quox/tests/Tests/Unicode.idr
2022-05-12 07:42:33 +02:00

90 lines
2.4 KiB
Idris
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

module Tests.Unicode
import Quox.NatExtra
import Quox.Unicode
import Data.List
import Data.String
import Data.Maybe
import TAP
maxLatin1 = '\xFF'
escape : Char -> Maybe String
escape '\'' = Nothing
escape c =
if c > maxLatin1 then Nothing else
case unpack $ show c of
'\'' :: '\\' :: cs => pack . ('\\' ::) <$> init' cs
_ => Nothing
codepoint : Char -> String
codepoint = padLeft 4 '0' . showHex . cast
display : Char -> String
display c =
let c' = fromMaybe (singleton c) $ escape c in
if '\x20' <= c && c <= maxLatin1
then "\{c'}」"
else "\{c'}」 (U+\{codepoint c})"
displayS' : String -> String
displayS' =
foldMap (\c => if c <= maxLatin1 then singleton c else "\\x\{codepoint c}") .
unpack
displayS : String -> String
displayS str =
if all (<= maxLatin1) (unpack str)
then "\{str}」"
else "\{str}」 (\"\{displayS' str}\")"
testOneChar : (Char -> Bool) -> Char -> Test
testOneChar pred c = test (display c) $ unless (pred c) $ Left ()
testAllChars : String -> (Char -> Bool) -> List Char -> Test
testAllChars label pred chars = label :- map (testOneChar pred) chars
testNfc : String -> String -> Test
testNfc input result =
test (displayS input) $
let norm = normalizeNfc input in
unless (norm == result) $
Left [("expected", displayS result), ("received", displayS norm)]
testAlreadyNfc : String -> Test
testAlreadyNfc input = testNfc input input
tests = "unicode" :- [
"general categories" :- [
testAllChars "id starts" isIdStart
['a', 'á', '𝕕', '', 'ʨ', '𒁙', '𝟙'],
testAllChars "not id starts" (not . isIdStart)
['0', '_', '-', '', ' ', '[', ',', '.', '\1'],
testAllChars "id continuations" isIdCont
['a', 'á', '𝕕', '', 'ʨ', '𒁙', '0', '\''],
testAllChars "not id continuations" (not . isIdCont)
['_', '', ' ', '[', ',', '.', '\1'],
testAllChars "id connectors" isIdConnector
['_', ''],
testAllChars "not id connectors" (not . isIdConnector)
['a', ' ', ',', '-'],
testAllChars "white space" isWhitespace
[' ', '\t', '\r', '\n',
'\x2028', -- line separator
'\x2029' -- paragraph separator
],
testAllChars "not white space" (not . isWhitespace)
['a', '-', '_', '\1']
],
"normalisation" :- [
testNfc "e\x301" "é",
testAlreadyNfc "é",
testAlreadyNfc ""
-- idk if this is wrong it's chez's fault. or unicode's
]
]