91 lines
2.4 KiB
Idris
91 lines
2.4 KiB
Idris
|
module Tests.Unicode
|
|||
|
|
|||
|
import Quox.NatExtra
|
|||
|
import Quox.Unicode
|
|||
|
import Data.List
|
|||
|
import Data.String
|
|||
|
import Data.Maybe
|
|||
|
import TAP
|
|||
|
|
|||
|
|
|||
|
maxLatin1 = '\xFF'
|
|||
|
|
|||
|
escape : Char -> Maybe String
|
|||
|
escape '\'' = Nothing
|
|||
|
escape c =
|
|||
|
if c > maxLatin1 then Nothing else
|
|||
|
case unpack $ show c of
|
|||
|
'\'' :: '\\' :: cs => pack . ('\\' ::) <$> init' cs
|
|||
|
_ => Nothing
|
|||
|
|
|||
|
codepoint : Char -> String
|
|||
|
codepoint = padLeft 4 '0' . showHex . cast
|
|||
|
|
|||
|
display : Char -> String
|
|||
|
display c =
|
|||
|
let c' = fromMaybe (singleton c) $ escape c in
|
|||
|
if '\x20' <= c && c <= maxLatin1
|
|||
|
then "「\{c'}」"
|
|||
|
else "「\{c'}」 (U+\{codepoint c})"
|
|||
|
|
|||
|
displayS' : String -> String
|
|||
|
displayS' =
|
|||
|
foldMap (\c => if c <= maxLatin1 then singleton c else "\\x\{codepoint c}") .
|
|||
|
unpack
|
|||
|
|
|||
|
displayS : String -> String
|
|||
|
displayS str =
|
|||
|
if all (<= maxLatin1) (unpack str)
|
|||
|
then "「\{str}」"
|
|||
|
else "「\{str}」 (\"\{displayS' str}\")"
|
|||
|
|
|||
|
testOneChar : (Char -> Bool) -> Char -> Test
|
|||
|
testOneChar pred c = test (display c) $ unless (pred c) $ Left ()
|
|||
|
|
|||
|
testAllChars : String -> (Char -> Bool) -> List Char -> Test
|
|||
|
testAllChars label pred chars = label :- map (testOneChar pred) chars
|
|||
|
|
|||
|
|
|||
|
testNfc : String -> String -> Test
|
|||
|
testNfc input result =
|
|||
|
test (displayS input) $
|
|||
|
let norm = normalizeNfc input in
|
|||
|
unless (norm == result) $
|
|||
|
Left [("expected", displayS result), ("received", displayS norm)]
|
|||
|
|
|||
|
testAlreadyNfc : String -> Test
|
|||
|
testAlreadyNfc input = testNfc input input
|
|||
|
|
|||
|
|
|||
|
|
|||
|
tests = "unicode" :- [
|
|||
|
"general categories" :- [
|
|||
|
testAllChars "id starts" isIdStart
|
|||
|
['a', 'á', '𝕕', '개', 'ʨ', '𒁙', '𝟙'],
|
|||
|
testAllChars "not id starts" (not . isIdStart)
|
|||
|
['0', '_', '-', '‿', ' ', '[', ',', '.', '\1'],
|
|||
|
testAllChars "id continuations" isIdCont
|
|||
|
['a', 'á', '𝕕', '개', 'ʨ', '𒁙', '0', '\''],
|
|||
|
testAllChars "not id continuations" (not . isIdCont)
|
|||
|
['_', '‿', ' ', '[', ',', '.', '\1'],
|
|||
|
testAllChars "id connectors" isIdConnector
|
|||
|
['_', '‿'],
|
|||
|
testAllChars "not id connectors" (not . isIdConnector)
|
|||
|
['a', ' ', ',', '-'],
|
|||
|
testAllChars "white space" isWhitespace
|
|||
|
[' ', '\t', '\r', '\n',
|
|||
|
'\x2028', -- line separator
|
|||
|
'\x2029' -- paragraph separator
|
|||
|
],
|
|||
|
testAllChars "not white space" (not . isWhitespace)
|
|||
|
['a', '-', '_', '\1']
|
|||
|
],
|
|||
|
|
|||
|
"normalisation" :- [
|
|||
|
testNfc "e\x301" "é",
|
|||
|
testAlreadyNfc "é",
|
|||
|
testAlreadyNfc ""
|
|||
|
-- idk if this is wrong it's chez's fault. or unicode's
|
|||
|
]
|
|||
|
]
|