module Tests.Unicode import Quox.NatExtra import Quox.Unicode import Data.List import Data.String import Data.Maybe import TAP maxLatin1 = '\xFF' escape : Char -> Maybe String escape '\'' = Nothing escape c = if c > maxLatin1 then Nothing else case unpack $ show c of '\'' :: '\\' :: cs => pack . ('\\' ::) <$> init' cs _ => Nothing codepoint : Char -> String codepoint = padLeft 4 '0' . showHex . cast display : Char -> String display c = let c' = fromMaybe (singleton c) $ escape c in if '\x20' <= c && c <= maxLatin1 then "「\{c'}」" else "「\{c'}」 (U+\{codepoint c})" displayS' : String -> String displayS' = foldMap (\c => if c <= maxLatin1 then singleton c else "\\x\{codepoint c}") . unpack displayS : String -> String displayS str = if all (<= maxLatin1) (unpack str) then "「\{str}」" else "「\{str}」 (\"\{displayS' str}\")" testOneChar : (Char -> Bool) -> Char -> Test testOneChar pred c = test (display c) $ unless (pred c) $ Left () testAllChars : String -> (Char -> Bool) -> List Char -> Test testAllChars label pred chars = label :- map (testOneChar pred) chars testNfc : String -> String -> Test testNfc input result = test (displayS input) $ let norm = normalizeNfc input in unless (norm == result) $ Left [("expected", displayS result), ("received", displayS norm)] testAlreadyNfc : String -> Test testAlreadyNfc input = testNfc input input tests = "unicode" :- [ "general categories" :- [ testAllChars "id starts" isIdStart ['a', 'á', '𝕕', '개', 'ʨ', '𒁙', '𝟙'], testAllChars "not id starts" (not . isIdStart) ['0', '_', '-', '‿', ' ', '[', ',', '.', '\1'], testAllChars "id continuations" isIdCont ['a', 'á', '𝕕', '개', 'ʨ', '𒁙', '0', '\''], testAllChars "not id continuations" (not . isIdCont) ['_', '‿', ' ', '[', ',', '.', '\1'], testAllChars "id connectors" isIdConnector ['_', '‿'], testAllChars "not id connectors" (not . isIdConnector) ['a', ' ', ',', '-'], testAllChars "white space" isWhitespace [' ', '\t', '\r', '\n', '\x2028', -- line separator '\x2029' -- paragraph separator ], testAllChars "not white space" (not . isWhitespace) ['a', '-', '_', '\1'] ], "normalisation" :- [ testNfc "e\x301" "é", testAlreadyNfc "é", testAlreadyNfc "" -- idk if this is wrong it's chez's fault. or unicode's ] ]