char class stuff

This commit is contained in:
rhiannon morris 2022-05-10 22:40:44 +02:00
parent 123e4b6ab4
commit c743a99356
5 changed files with 97 additions and 8 deletions

View file

@ -96,16 +96,12 @@ genCat ch = assert_total $
"Cn" => Other NotAssigned "Cn" => Other NotAssigned
export
isPrintable : Char -> Bool
isPrintable ch = case genCat ch of Other _ => False; _ => True
export export
isIdStart : Char -> Bool isIdStart : Char -> Bool
isIdStart ch = isIdStart ch =
case genCat ch of case genCat ch of
Letter _ => True Letter _ => True
Punctuation Connector => True -- _, tie bars, etc Number _ => not ('0' <= ch && ch <= '9')
_ => False _ => False
export export
@ -117,14 +113,23 @@ isIdCont ch =
Number _ => True Number _ => True
_ => False _ => False
export
isIdConnector : Char -> Bool
isIdConnector ch =
case genCat ch of Punctuation Connector => True; _ => False
export export
isSymChar : Char -> Bool isSymChar : Char -> Bool
isSymChar ch = isSymChar ch =
case genCat ch of case genCat ch of
Symbol _ => True Symbol _ => True
Punctuation Dash => True
Punctuation Other => True Punctuation Other => True
_ => False _ => False
export export
isWhitespace : Char -> Bool isWhitespace : Char -> Bool
isWhitespace ch = case genCat ch of Separator _ => True; _ => False isWhitespace ch =
ch == '\t' || ch == '\r' || ch == '\n' ||
case genCat ch of Separator _ => True; _ => False

View file

@ -1,6 +1,9 @@
module Quox.NatExtra module Quox.NatExtra
import public Data.Nat import public Data.Nat
import Data.Nat.Division
import Data.SnocList
import Data.Vect
%default total %default total
@ -30,3 +33,25 @@ public export
toLte : {n : Nat} -> m `LTE'` n -> m `LTE` n toLte : {n : Nat} -> m `LTE'` n -> m `LTE` n
toLte LTERefl = reflexive toLte LTERefl = reflexive
toLte (LTESuccR p) = lteSuccRight (toLte p) toLte (LTESuccR p) = lteSuccRight (toLte p)
private
0 baseNZ : n `GTE` 2 => NonZero n
baseNZ @{LTESucc _} = SIsNonZero
parameters {base : Nat} {auto 0 _ : base `GTE` 2} (chars : Vect base Char)
private
showAtBase' : List Char -> Nat -> List Char
showAtBase' acc 0 = acc
showAtBase' acc k =
let dig = natToFinLT (modNatNZ k base baseNZ) @{boundModNatNZ {}} in
showAtBase' (index dig chars :: acc)
(assert_smaller k $ divNatNZ k base baseNZ)
export
showAtBase : Nat -> String
showAtBase = pack . showAtBase' []
export
showHex : Nat -> String
showHex = showAtBase $ fromList $ unpack "0123456789ABCDEF"

View file

@ -9,6 +9,7 @@ depends = base, contrib, elab-util, sop, snocvect
modules = modules =
Quox.NatExtra, Quox.NatExtra,
Quox.CharExtra,
Quox.OPE, Quox.OPE,
Quox.Pretty, Quox.Pretty,
Quox.Syntax, Quox.Syntax,

View file

@ -2,6 +2,7 @@ module Tests
import Options import Options
import TAP import TAP
import Tests.CharExtra
import Tests.Lexer import Tests.Lexer
import Tests.Parser import Tests.Parser
import Tests.Equal import Tests.Equal
@ -9,6 +10,7 @@ import System
allTests = [ allTests = [
CharExtra.tests,
Lexer.tests, Lexer.tests,
Parser.tests, Parser.tests,
Equal.tests Equal.tests

56
tests/Tests/CharExtra.idr Normal file
View file

@ -0,0 +1,56 @@
module Tests.CharExtra
import Quox.NatExtra
import Quox.CharExtra
import Data.List
import Data.String
import Data.Maybe
import TAP
escape : Char -> Maybe String
escape '\'' = Nothing
escape c =
if ord c > 0xFF then Nothing else
case unpack $ show c of
'\'' :: '\\' :: cs => pack . ('\\' ::) <$> init' cs
_ => Nothing
display : Char -> String
display c =
let ord = cast c {to = Nat} in
let c' = fromMaybe (singleton c) $ escape c in
if 0x20 <= ord && ord <= 0xFF then
"\{c'}」"
else
let codepoint = padLeft 4 '0' $ showHex ord in
"\{c'}」 (U+\{codepoint})"
testOne : (Char -> Bool) -> Char -> Test
testOne pred c = test (display c) $ unless (pred c) $ Left ()
testAll : String -> (Char -> Bool) -> List Char -> Test
testAll label pred chars = label :- map (testOne pred) chars
tests = "char extras" :- [
testAll "id starts" isIdStart
['a', 'á', '𝕕', '', 'ʨ', '𒁙', '𝟙'],
testAll "not id starts" (not . isIdStart)
['0', '_', '-', '', ' ', '[', ',', '.', '\1'],
testAll "id continuations" isIdCont
['a', 'á', '𝕕', '', 'ʨ', '𒁙', '0', '\''],
testAll "not id continuations" (not . isIdCont)
['_', '', ' ', '[', ',', '.', '\1'],
testAll "id connectors" isIdConnector
['_', ''],
testAll "not id connectors" (not . isIdConnector)
['a', ' ', ',', '-'],
testAll "white space" isWhitespace
[' ', '\t', '\r', '\n',
'\x2028', -- line separator
'\x2029' -- paragraph separator
],
testAll "not white space" (not . isWhitespace)
['a', '-', '_', '\1']
]