char class stuff
This commit is contained in:
parent
123e4b6ab4
commit
c743a99356
5 changed files with 97 additions and 8 deletions
|
@ -96,16 +96,12 @@ genCat ch = assert_total $
|
||||||
"Cn" => Other NotAssigned
|
"Cn" => Other NotAssigned
|
||||||
|
|
||||||
|
|
||||||
export
|
|
||||||
isPrintable : Char -> Bool
|
|
||||||
isPrintable ch = case genCat ch of Other _ => False; _ => True
|
|
||||||
|
|
||||||
export
|
export
|
||||||
isIdStart : Char -> Bool
|
isIdStart : Char -> Bool
|
||||||
isIdStart ch =
|
isIdStart ch =
|
||||||
case genCat ch of
|
case genCat ch of
|
||||||
Letter _ => True
|
Letter _ => True
|
||||||
Punctuation Connector => True -- _, tie bars, etc
|
Number _ => not ('0' <= ch && ch <= '9')
|
||||||
_ => False
|
_ => False
|
||||||
|
|
||||||
export
|
export
|
||||||
|
@ -117,14 +113,23 @@ isIdCont ch =
|
||||||
Number _ => True
|
Number _ => True
|
||||||
_ => False
|
_ => False
|
||||||
|
|
||||||
|
export
|
||||||
|
isIdConnector : Char -> Bool
|
||||||
|
isIdConnector ch =
|
||||||
|
case genCat ch of Punctuation Connector => True; _ => False
|
||||||
|
|
||||||
|
|
||||||
export
|
export
|
||||||
isSymChar : Char -> Bool
|
isSymChar : Char -> Bool
|
||||||
isSymChar ch =
|
isSymChar ch =
|
||||||
case genCat ch of
|
case genCat ch of
|
||||||
Symbol _ => True
|
Symbol _ => True
|
||||||
|
Punctuation Dash => True
|
||||||
Punctuation Other => True
|
Punctuation Other => True
|
||||||
_ => False
|
_ => False
|
||||||
|
|
||||||
export
|
export
|
||||||
isWhitespace : Char -> Bool
|
isWhitespace : Char -> Bool
|
||||||
isWhitespace ch = case genCat ch of Separator _ => True; _ => False
|
isWhitespace ch =
|
||||||
|
ch == '\t' || ch == '\r' || ch == '\n' ||
|
||||||
|
case genCat ch of Separator _ => True; _ => False
|
||||||
|
|
|
@ -1,6 +1,9 @@
|
||||||
module Quox.NatExtra
|
module Quox.NatExtra
|
||||||
|
|
||||||
import public Data.Nat
|
import public Data.Nat
|
||||||
|
import Data.Nat.Division
|
||||||
|
import Data.SnocList
|
||||||
|
import Data.Vect
|
||||||
|
|
||||||
%default total
|
%default total
|
||||||
|
|
||||||
|
@ -30,3 +33,25 @@ public export
|
||||||
toLte : {n : Nat} -> m `LTE'` n -> m `LTE` n
|
toLte : {n : Nat} -> m `LTE'` n -> m `LTE` n
|
||||||
toLte LTERefl = reflexive
|
toLte LTERefl = reflexive
|
||||||
toLte (LTESuccR p) = lteSuccRight (toLte p)
|
toLte (LTESuccR p) = lteSuccRight (toLte p)
|
||||||
|
|
||||||
|
|
||||||
|
private
|
||||||
|
0 baseNZ : n `GTE` 2 => NonZero n
|
||||||
|
baseNZ @{LTESucc _} = SIsNonZero
|
||||||
|
|
||||||
|
parameters {base : Nat} {auto 0 _ : base `GTE` 2} (chars : Vect base Char)
|
||||||
|
private
|
||||||
|
showAtBase' : List Char -> Nat -> List Char
|
||||||
|
showAtBase' acc 0 = acc
|
||||||
|
showAtBase' acc k =
|
||||||
|
let dig = natToFinLT (modNatNZ k base baseNZ) @{boundModNatNZ {}} in
|
||||||
|
showAtBase' (index dig chars :: acc)
|
||||||
|
(assert_smaller k $ divNatNZ k base baseNZ)
|
||||||
|
|
||||||
|
export
|
||||||
|
showAtBase : Nat -> String
|
||||||
|
showAtBase = pack . showAtBase' []
|
||||||
|
|
||||||
|
export
|
||||||
|
showHex : Nat -> String
|
||||||
|
showHex = showAtBase $ fromList $ unpack "0123456789ABCDEF"
|
||||||
|
|
|
@ -9,6 +9,7 @@ depends = base, contrib, elab-util, sop, snocvect
|
||||||
|
|
||||||
modules =
|
modules =
|
||||||
Quox.NatExtra,
|
Quox.NatExtra,
|
||||||
|
Quox.CharExtra,
|
||||||
Quox.OPE,
|
Quox.OPE,
|
||||||
Quox.Pretty,
|
Quox.Pretty,
|
||||||
Quox.Syntax,
|
Quox.Syntax,
|
||||||
|
|
|
@ -2,6 +2,7 @@ module Tests
|
||||||
|
|
||||||
import Options
|
import Options
|
||||||
import TAP
|
import TAP
|
||||||
|
import Tests.CharExtra
|
||||||
import Tests.Lexer
|
import Tests.Lexer
|
||||||
import Tests.Parser
|
import Tests.Parser
|
||||||
import Tests.Equal
|
import Tests.Equal
|
||||||
|
@ -9,6 +10,7 @@ import System
|
||||||
|
|
||||||
|
|
||||||
allTests = [
|
allTests = [
|
||||||
|
CharExtra.tests,
|
||||||
Lexer.tests,
|
Lexer.tests,
|
||||||
Parser.tests,
|
Parser.tests,
|
||||||
Equal.tests
|
Equal.tests
|
||||||
|
|
56
tests/Tests/CharExtra.idr
Normal file
56
tests/Tests/CharExtra.idr
Normal file
|
@ -0,0 +1,56 @@
|
||||||
|
module Tests.CharExtra
|
||||||
|
|
||||||
|
import Quox.NatExtra
|
||||||
|
import Quox.CharExtra
|
||||||
|
import Data.List
|
||||||
|
import Data.String
|
||||||
|
import Data.Maybe
|
||||||
|
import TAP
|
||||||
|
|
||||||
|
|
||||||
|
escape : Char -> Maybe String
|
||||||
|
escape '\'' = Nothing
|
||||||
|
escape c =
|
||||||
|
if ord c > 0xFF then Nothing else
|
||||||
|
case unpack $ show c of
|
||||||
|
'\'' :: '\\' :: cs => pack . ('\\' ::) <$> init' cs
|
||||||
|
_ => Nothing
|
||||||
|
|
||||||
|
display : Char -> String
|
||||||
|
display c =
|
||||||
|
let ord = cast c {to = Nat} in
|
||||||
|
let c' = fromMaybe (singleton c) $ escape c in
|
||||||
|
if 0x20 <= ord && ord <= 0xFF then
|
||||||
|
"「\{c'}」"
|
||||||
|
else
|
||||||
|
let codepoint = padLeft 4 '0' $ showHex ord in
|
||||||
|
"「\{c'}」 (U+\{codepoint})"
|
||||||
|
|
||||||
|
testOne : (Char -> Bool) -> Char -> Test
|
||||||
|
testOne pred c = test (display c) $ unless (pred c) $ Left ()
|
||||||
|
|
||||||
|
testAll : String -> (Char -> Bool) -> List Char -> Test
|
||||||
|
testAll label pred chars = label :- map (testOne pred) chars
|
||||||
|
|
||||||
|
|
||||||
|
tests = "char extras" :- [
|
||||||
|
testAll "id starts" isIdStart
|
||||||
|
['a', 'á', '𝕕', '개', 'ʨ', '𒁙', '𝟙'],
|
||||||
|
testAll "not id starts" (not . isIdStart)
|
||||||
|
['0', '_', '-', '‿', ' ', '[', ',', '.', '\1'],
|
||||||
|
testAll "id continuations" isIdCont
|
||||||
|
['a', 'á', '𝕕', '개', 'ʨ', '𒁙', '0', '\''],
|
||||||
|
testAll "not id continuations" (not . isIdCont)
|
||||||
|
['_', '‿', ' ', '[', ',', '.', '\1'],
|
||||||
|
testAll "id connectors" isIdConnector
|
||||||
|
['_', '‿'],
|
||||||
|
testAll "not id connectors" (not . isIdConnector)
|
||||||
|
['a', ' ', ',', '-'],
|
||||||
|
testAll "white space" isWhitespace
|
||||||
|
[' ', '\t', '\r', '\n',
|
||||||
|
'\x2028', -- line separator
|
||||||
|
'\x2029' -- paragraph separator
|
||||||
|
],
|
||||||
|
testAll "not white space" (not . isWhitespace)
|
||||||
|
['a', '-', '_', '\1']
|
||||||
|
]
|
Loading…
Reference in a new issue