quox/tests/Tests/Unicode.idr

91 lines
2.4 KiB
Idris
Raw Permalink Normal View History

2022-05-12 01:41:58 -04:00
module Tests.Unicode
import Quox.NatExtra
import Quox.Unicode
import Data.List
import Data.String
import Data.Maybe
import TAP
maxLatin1 = '\xFF'
escape : Char -> Maybe String
escape '\'' = Nothing
escape c =
if c > maxLatin1 then Nothing else
case unpack $ show c of
'\'' :: '\\' :: cs => pack . ('\\' ::) <$> init' cs
_ => Nothing
codepoint : Char -> String
codepoint = padLeft 4 '0' . showHex . cast
display : Char -> String
display c =
let c' = fromMaybe (singleton c) $ escape c in
if '\x20' <= c && c <= maxLatin1
then "\{c'}」"
else "\{c'}」 (U+\{codepoint c})"
displayS' : String -> String
displayS' =
foldMap (\c => if c <= maxLatin1 then singleton c else "\\x\{codepoint c}") .
unpack
displayS : String -> String
displayS str =
if all (<= maxLatin1) (unpack str)
then "\{str}」"
else "\{str}」 (\"\{displayS' str}\")"
testOneChar : (Char -> Bool) -> Char -> Test
testOneChar pred c = test (display c) $ unless (pred c) $ Left ()
testAllChars : String -> (Char -> Bool) -> List Char -> Test
testAllChars label pred chars = label :- map (testOneChar pred) chars
testNfc : String -> String -> Test
testNfc input result =
test (displayS input) $
let norm = normalizeNfc input in
unless (norm == result) $
Left [("expected", displayS result), ("received", displayS norm)]
testAlreadyNfc : String -> Test
testAlreadyNfc input = testNfc input input
tests = "unicode" :- [
"general categories" :- [
testAllChars "id starts" isIdStart
['a', 'á', '𝕕', '', 'ʨ', '𒁙', '𝟙'],
testAllChars "not id starts" (not . isIdStart)
['0', '_', '-', '', ' ', '[', ',', '.', '\1'],
testAllChars "id continuations" isIdCont
['a', 'á', '𝕕', '', 'ʨ', '𒁙', '0', '\''],
testAllChars "not id continuations" (not . isIdCont)
['_', '', ' ', '[', ',', '.', '\1'],
testAllChars "id connectors" isIdConnector
['_', ''],
testAllChars "not id connectors" (not . isIdConnector)
['a', ' ', ',', '-'],
testAllChars "white space" isWhitespace
[' ', '\t', '\r', '\n',
'\x2028', -- line separator
'\x2029' -- paragraph separator
],
testAllChars "not white space" (not . isWhitespace)
['a', '-', '_', '\1']
],
"normalisation" :- [
testNfc "e\x301" "é",
testAlreadyNfc "é",
testAlreadyNfc ""
-- idk if this is wrong it's chez's fault. or unicode's
]
]