diff --git a/lib/Quox/CharExtra.idr b/lib/Quox/CharExtra.idr index c9f718b..52b9fa0 100644 --- a/lib/Quox/CharExtra.idr +++ b/lib/Quox/CharExtra.idr @@ -96,17 +96,13 @@ genCat ch = assert_total $ "Cn" => Other NotAssigned -export -isPrintable : Char -> Bool -isPrintable ch = case genCat ch of Other _ => False; _ => True - export isIdStart : Char -> Bool isIdStart ch = case genCat ch of - Letter _ => True - Punctuation Connector => True -- _, tie bars, etc - _ => False + Letter _ => True + Number _ => not ('0' <= ch && ch <= '9') + _ => False export isIdCont : Char -> Bool @@ -117,14 +113,23 @@ isIdCont ch = Number _ => True _ => False +export +isIdConnector : Char -> Bool +isIdConnector ch = + case genCat ch of Punctuation Connector => True; _ => False + + export isSymChar : Char -> Bool isSymChar ch = case genCat ch of Symbol _ => True + Punctuation Dash => True Punctuation Other => True _ => False export isWhitespace : Char -> Bool -isWhitespace ch = case genCat ch of Separator _ => True; _ => False +isWhitespace ch = + ch == '\t' || ch == '\r' || ch == '\n' || + case genCat ch of Separator _ => True; _ => False diff --git a/lib/Quox/NatExtra.idr b/lib/Quox/NatExtra.idr index 9e7d5de..42f627e 100644 --- a/lib/Quox/NatExtra.idr +++ b/lib/Quox/NatExtra.idr @@ -1,6 +1,9 @@ module Quox.NatExtra import public Data.Nat +import Data.Nat.Division +import Data.SnocList +import Data.Vect %default total @@ -30,3 +33,25 @@ public export toLte : {n : Nat} -> m `LTE'` n -> m `LTE` n toLte LTERefl = reflexive toLte (LTESuccR p) = lteSuccRight (toLte p) + + +private +0 baseNZ : n `GTE` 2 => NonZero n +baseNZ @{LTESucc _} = SIsNonZero + +parameters {base : Nat} {auto 0 _ : base `GTE` 2} (chars : Vect base Char) + private + showAtBase' : List Char -> Nat -> List Char + showAtBase' acc 0 = acc + showAtBase' acc k = + let dig = natToFinLT (modNatNZ k base baseNZ) @{boundModNatNZ {}} in + showAtBase' (index dig chars :: acc) + (assert_smaller k $ divNatNZ k base baseNZ) + + export + showAtBase : Nat -> String + showAtBase = pack . showAtBase' [] + +export +showHex : Nat -> String +showHex = showAtBase $ fromList $ unpack "0123456789ABCDEF" diff --git a/lib/quox-lib.ipkg b/lib/quox-lib.ipkg index 5334aff..dd5a02a 100644 --- a/lib/quox-lib.ipkg +++ b/lib/quox-lib.ipkg @@ -9,6 +9,7 @@ depends = base, contrib, elab-util, sop, snocvect modules = Quox.NatExtra, + Quox.CharExtra, Quox.OPE, Quox.Pretty, Quox.Syntax, diff --git a/tests/Tests.idr b/tests/Tests.idr index a1fe8c6..b6a7462 100644 --- a/tests/Tests.idr +++ b/tests/Tests.idr @@ -2,6 +2,7 @@ module Tests import Options import TAP +import Tests.CharExtra import Tests.Lexer import Tests.Parser import Tests.Equal @@ -9,6 +10,7 @@ import System allTests = [ + CharExtra.tests, Lexer.tests, Parser.tests, Equal.tests diff --git a/tests/Tests/CharExtra.idr b/tests/Tests/CharExtra.idr new file mode 100644 index 0000000..594c25a --- /dev/null +++ b/tests/Tests/CharExtra.idr @@ -0,0 +1,56 @@ +module Tests.CharExtra + +import Quox.NatExtra +import Quox.CharExtra +import Data.List +import Data.String +import Data.Maybe +import TAP + + +escape : Char -> Maybe String +escape '\'' = Nothing +escape c = + if ord c > 0xFF then Nothing else + case unpack $ show c of + '\'' :: '\\' :: cs => pack . ('\\' ::) <$> init' cs + _ => Nothing + +display : Char -> String +display c = + let ord = cast c {to = Nat} in + let c' = fromMaybe (singleton c) $ escape c in + if 0x20 <= ord && ord <= 0xFF then + "「\{c'}」" + else + let codepoint = padLeft 4 '0' $ showHex ord in + "「\{c'}」 (U+\{codepoint})" + +testOne : (Char -> Bool) -> Char -> Test +testOne pred c = test (display c) $ unless (pred c) $ Left () + +testAll : String -> (Char -> Bool) -> List Char -> Test +testAll label pred chars = label :- map (testOne pred) chars + + +tests = "char extras" :- [ + testAll "id starts" isIdStart + ['a', 'á', '𝕕', '개', 'ʨ', '𒁙', '𝟙'], + testAll "not id starts" (not . isIdStart) + ['0', '_', '-', '‿', ' ', '[', ',', '.', '\1'], + testAll "id continuations" isIdCont + ['a', 'á', '𝕕', '개', 'ʨ', '𒁙', '0', '\''], + testAll "not id continuations" (not . isIdCont) + ['_', '‿', ' ', '[', ',', '.', '\1'], + testAll "id connectors" isIdConnector + ['_', '‿'], + testAll "not id connectors" (not . isIdConnector) + ['a', ' ', ',', '-'], + testAll "white space" isWhitespace + [' ', '\t', '\r', '\n', + '\x2028', -- line separator + '\x2029' -- paragraph separator + ], + testAll "not white space" (not . isWhitespace) + ['a', '-', '_', '\1'] +]