string/nat lit stuff
This commit is contained in:
parent
3b9a339e5e
commit
e211887a34
5 changed files with 146 additions and 41 deletions
|
@ -166,3 +166,10 @@ isWhitespace ch = ch == '\t' || ch == '\r' || ch == '\n' || isSeparator ch
|
|||
export
|
||||
%foreign "scheme:string-normalize-nfc"
|
||||
normalizeNfc : String -> String
|
||||
|
||||
|
||||
export
|
||||
isCodepoint : Int -> Bool
|
||||
isCodepoint n =
|
||||
n <= 0x10FFFF &&
|
||||
not (n >= 0xD800 && n <= 0xDBFF || n >= 0xDC00 && n <= 0xDFFF)
|
||||
|
|
|
@ -4,6 +4,7 @@ import public Data.Nat
|
|||
import Data.Nat.Division
|
||||
import Data.SnocList
|
||||
import Data.Vect
|
||||
import Data.String
|
||||
|
||||
%default total
|
||||
|
||||
|
@ -52,6 +53,42 @@ parameters {base : Nat} {auto 0 _ : base `GTE` 2} (chars : Vect base Char)
|
|||
showAtBase : Nat -> String
|
||||
showAtBase = pack . showAtBase' []
|
||||
|
||||
export
|
||||
showHex : Nat -> String
|
||||
showHex = showAtBase $ fromList $ unpack "0123456789ABCDEF"
|
||||
namespace Nat
|
||||
export
|
||||
showHex : Nat -> String
|
||||
showHex = showAtBase $ fromList $ unpack "0123456789abcdef"
|
||||
|
||||
namespace Int
|
||||
export
|
||||
showHex : Int -> String
|
||||
showHex x =
|
||||
if x < 0 then "-" ++ Nat.showHex (cast (-x)) else Nat.showHex (cast x)
|
||||
|
||||
|
||||
namespace Int
|
||||
export
|
||||
fromHexit : Char -> Maybe Int
|
||||
fromHexit c =
|
||||
if c >= '0' && c <= '9' then Just $ ord c - ord '0'
|
||||
else if c >= 'a' && c <= 'f' then Just $ ord c - ord 'a' + 10
|
||||
else if c >= 'A' && c <= 'F' then Just $ ord c - ord 'A' + 10
|
||||
else Nothing
|
||||
|
||||
private
|
||||
fromHex' : Int -> String -> Maybe Int
|
||||
fromHex' acc str = case strM str of
|
||||
StrNil => Just acc
|
||||
StrCons c cs => fromHex' (16 * acc + !(fromHexit c)) (assert_smaller str cs)
|
||||
|
||||
export %inline
|
||||
fromHex : String -> Maybe Int
|
||||
fromHex = fromHex' 0
|
||||
|
||||
namespace Nat
|
||||
export
|
||||
fromHexit : Char -> Maybe Nat
|
||||
fromHexit = map cast . Int.fromHexit
|
||||
|
||||
export %inline
|
||||
fromHex : String -> Maybe Nat
|
||||
fromHex = map cast . Int.fromHex
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
module Quox.Parser.Lexer
|
||||
|
||||
import Quox.CharExtra
|
||||
import Quox.NatExtra
|
||||
import Quox.Name
|
||||
import Data.String.Extra
|
||||
import Data.SortedMap
|
||||
|
@ -72,29 +73,84 @@ tmatch t f = match t (T . f)
|
|||
|
||||
|
||||
export
|
||||
fromStringLit : String -> String
|
||||
fromStringLit = pack . go . unpack . drop 1 . dropLast 1 where
|
||||
go : List Char -> List Char
|
||||
go [] = []
|
||||
go ['\\'] = ['\\'] -- i guess???
|
||||
go ('\\' :: 'n' :: cs) = '\n' :: go cs
|
||||
go ('\\' :: 't' :: cs) = '\t' :: go cs
|
||||
fromStringLit : (String -> Token) -> String -> ExtToken
|
||||
fromStringLit f str =
|
||||
case go $ unpack $ drop 1 $ dropLast 1 str of
|
||||
Left err => Invalid err str
|
||||
Right ok => T $ f $ pack ok
|
||||
where
|
||||
Interpolation Char where interpolate = singleton
|
||||
|
||||
go, hexEscape : List Char -> Either String (List Char)
|
||||
|
||||
go [] = Right []
|
||||
go ['\\'] = Left "string ends with \\"
|
||||
go ('\\' :: 'n' :: cs) = ('\n' ::) <$> go cs
|
||||
go ('\\' :: 't' :: cs) = ('\t' ::) <$> go cs
|
||||
go ('\\' :: 'x' :: cs) = hexEscape cs
|
||||
go ('\\' :: 'X' :: cs) = hexEscape cs
|
||||
go ('\\' :: '\\' :: cs) = ('\\' ::) <$> go cs
|
||||
go ('\\' :: '"' :: cs) = ('"' ::) <$> go cs
|
||||
-- [todo] others
|
||||
go ('\\' :: c :: cs) = c :: go cs
|
||||
go (c :: cs) = c :: go cs
|
||||
go ('\\' :: c :: _) = Left "unknown escape '\{c}'"
|
||||
go (c :: cs) = (c ::) <$> go cs
|
||||
|
||||
hexEscape cs =
|
||||
case break (== ';') cs of
|
||||
(hs, ';' :: rest) => do
|
||||
let hs = pack hs
|
||||
let Just c = Int.fromHex hs
|
||||
| Nothing => Left #"invalid hex string "\#{hs}" in escape"#
|
||||
if isCodepoint c
|
||||
then (chr c ::) <$> go (assert_smaller cs rest)
|
||||
else Left "codepoint \{hs} out of range"
|
||||
_ => Left "unterminated hex escape"
|
||||
|
||||
private
|
||||
string : Tokenizer ExtToken
|
||||
string = tmatch stringLit (Str . fromStringLit)
|
||||
string = match stringLit $ fromStringLit Str
|
||||
|
||||
|
||||
%hide binLit
|
||||
%hide octLit
|
||||
%hide hexLit
|
||||
|
||||
private
|
||||
nat : Tokenizer ExtToken
|
||||
nat = tmatch (some (range '0' '9')) (Nat . cast)
|
||||
nat = match hexLit fromHexLit
|
||||
<|> tmatch decLit fromDecLit
|
||||
where
|
||||
withUnderscores : Lexer -> Lexer
|
||||
withUnderscores l = l <+> many (opt (is '_') <+> l)
|
||||
|
||||
withoutUnderscores : String -> String
|
||||
withoutUnderscores = pack . go . unpack where
|
||||
go : List Char -> List Char
|
||||
go [] = []
|
||||
go ('_' :: cs) = go cs
|
||||
go (c :: cs) = c :: go cs
|
||||
|
||||
decLit =
|
||||
withUnderscores (range '0' '9') <+> reject idContEnd
|
||||
|
||||
hexLit =
|
||||
approx "0x" <+>
|
||||
withUnderscores (range '0' '9' <|> range 'a' 'f' <|> range 'A' 'F') <+>
|
||||
reject idContEnd
|
||||
|
||||
fromDecLit : String -> Token
|
||||
fromDecLit = Nat . cast . withoutUnderscores
|
||||
|
||||
fromHexLit : String -> ExtToken
|
||||
fromHexLit str =
|
||||
maybe (Invalid "invalid hex sequence" str) (T . Nat) $
|
||||
fromHex $ withoutUnderscores $ drop 2 str
|
||||
|
||||
|
||||
private
|
||||
tag : Tokenizer ExtToken
|
||||
tag = tmatch (is '\'' <+> name) (Tag . drop 1)
|
||||
<|> tmatch (is '\'' <+> stringLit) (Tag . fromStringLit . drop 1)
|
||||
<|> match (is '\'' <+> stringLit) (fromStringLit Tag . drop 1)
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -7,6 +7,7 @@ import Quox.Pretty
|
|||
|
||||
import Quox.EffExtra
|
||||
import Quox.CharExtra
|
||||
import Quox.NatExtra
|
||||
import Data.DPair
|
||||
import Data.List1
|
||||
import Data.String
|
||||
|
@ -278,27 +279,15 @@ prelude = """
|
|||
;;;;;;
|
||||
"""
|
||||
|
||||
export
|
||||
toHex : Int -> String
|
||||
toHex x =
|
||||
case toHex' x of
|
||||
[<] => "0"
|
||||
cs => concatMap singleton cs
|
||||
where
|
||||
toHex' : Int -> SnocList Char
|
||||
toHex' x =
|
||||
if x == 0 then [<] else
|
||||
let d = x `div` 16
|
||||
m = x `mod` 16 in
|
||||
toHex' (assert_smaller x d) :<
|
||||
assert_total strIndex "0123456789abcdef" m
|
||||
|
||||
export
|
||||
escape : String -> String
|
||||
escape = concatMap esc1 . unpack where
|
||||
escape = foldMap esc1 . unpack where
|
||||
esc1 : Char -> String
|
||||
esc1 c = if c < ' ' || c > '~' || c == '\\' || c == '"'
|
||||
then "\\x" ++ toHex (ord c) ++ ";"
|
||||
esc1 c =
|
||||
if c == '\\' || c == '"' then
|
||||
"\\" ++ singleton c
|
||||
else if c < ' ' || c > '~' then
|
||||
"\\x" ++ showHex (ord c) ++ ";"
|
||||
else singleton c
|
||||
|
||||
export covering
|
||||
|
|
|
@ -153,10 +153,26 @@ tests = "lexer" :- [
|
|||
lexes #" "\"" "# [Str "\""],
|
||||
lexes #" "\\" "# [Str "\\"],
|
||||
lexes #" "\\\"" "# [Str "\\\""],
|
||||
todo "other escapes"
|
||||
lexes #" "\t" "# [Str "\t"],
|
||||
lexes #" "\n" "# [Str "\n"],
|
||||
lexes #" "🐉" "# [Str "🐉"],
|
||||
lexes #" "\x1f409;" "# [Str "🐉"],
|
||||
lexFail #" "\q" "#,
|
||||
lexFail #" "\" "#
|
||||
],
|
||||
|
||||
todo "naturals",
|
||||
"naturals" :- [
|
||||
lexes "0" [Nat 0],
|
||||
lexes "123" [Nat 123],
|
||||
lexes "69_420" [Nat 69420],
|
||||
lexes "0x123" [Nat 0x123],
|
||||
lexes "0xbeef" [Nat 0xbeef],
|
||||
lexes "0xBEEF" [Nat 0xBEEF],
|
||||
lexes "0XBEEF" [Nat 0xBEEF],
|
||||
lexes "0xbaba_baba" [Nat 0xbabababa],
|
||||
lexFail "123abc",
|
||||
lexFail "0x123abcghi"
|
||||
],
|
||||
|
||||
"universes" :- [
|
||||
lexes "Type0" [TYPE 0],
|
||||
|
|
Loading…
Reference in a new issue