90 lines
2.4 KiB
Idris
90 lines
2.4 KiB
Idris
module Tests.Unicode
|
||
|
||
import Quox.NatExtra
|
||
import Quox.Unicode
|
||
import Data.List
|
||
import Data.String
|
||
import Data.Maybe
|
||
import TAP
|
||
|
||
|
||
maxLatin1 = '\xFF'
|
||
|
||
escape : Char -> Maybe String
|
||
escape '\'' = Nothing
|
||
escape c =
|
||
if c > maxLatin1 then Nothing else
|
||
case unpack $ show c of
|
||
'\'' :: '\\' :: cs => pack . ('\\' ::) <$> init' cs
|
||
_ => Nothing
|
||
|
||
codepoint : Char -> String
|
||
codepoint = padLeft 4 '0' . showHex . cast
|
||
|
||
display : Char -> String
|
||
display c =
|
||
let c' = fromMaybe (singleton c) $ escape c in
|
||
if '\x20' <= c && c <= maxLatin1
|
||
then "「\{c'}」"
|
||
else "「\{c'}」 (U+\{codepoint c})"
|
||
|
||
displayS' : String -> String
|
||
displayS' =
|
||
foldMap (\c => if c <= maxLatin1 then singleton c else "\\x\{codepoint c}") .
|
||
unpack
|
||
|
||
displayS : String -> String
|
||
displayS str =
|
||
if all (<= maxLatin1) (unpack str)
|
||
then "「\{str}」"
|
||
else "「\{str}」 (\"\{displayS' str}\")"
|
||
|
||
testOneChar : (Char -> Bool) -> Char -> Test
|
||
testOneChar pred c = test (display c) $ unless (pred c) $ Left ()
|
||
|
||
testAllChars : String -> (Char -> Bool) -> List Char -> Test
|
||
testAllChars label pred chars = label :- map (testOneChar pred) chars
|
||
|
||
|
||
testNfc : String -> String -> Test
|
||
testNfc input result =
|
||
test (displayS input) $
|
||
let norm = normalizeNfc input in
|
||
unless (norm == result) $
|
||
Left [("expected", displayS result), ("received", displayS norm)]
|
||
|
||
testAlreadyNfc : String -> Test
|
||
testAlreadyNfc input = testNfc input input
|
||
|
||
|
||
|
||
tests = "unicode" :- [
|
||
"general categories" :- [
|
||
testAllChars "id starts" isIdStart
|
||
['a', 'á', '𝕕', '개', 'ʨ', '𒁙', '𝟙'],
|
||
testAllChars "not id starts" (not . isIdStart)
|
||
['0', '_', '-', '‿', ' ', '[', ',', '.', '\1'],
|
||
testAllChars "id continuations" isIdCont
|
||
['a', 'á', '𝕕', '개', 'ʨ', '𒁙', '0', '\''],
|
||
testAllChars "not id continuations" (not . isIdCont)
|
||
['_', '‿', ' ', '[', ',', '.', '\1'],
|
||
testAllChars "id connectors" isIdConnector
|
||
['_', '‿'],
|
||
testAllChars "not id connectors" (not . isIdConnector)
|
||
['a', ' ', ',', '-'],
|
||
testAllChars "white space" isWhitespace
|
||
[' ', '\t', '\r', '\n',
|
||
'\x2028', -- line separator
|
||
'\x2029' -- paragraph separator
|
||
],
|
||
testAllChars "not white space" (not . isWhitespace)
|
||
['a', '-', '_', '\1']
|
||
],
|
||
|
||
"normalisation" :- [
|
||
testNfc "e\x301" "é",
|
||
testAlreadyNfc "é",
|
||
testAlreadyNfc ""
|
||
-- idk if this is wrong it's chez's fault. or unicode's
|
||
]
|
||
]
|