string/nat lit stuff
This commit is contained in:
parent
3b9a339e5e
commit
e211887a34
5 changed files with 146 additions and 41 deletions
|
@ -166,3 +166,10 @@ isWhitespace ch = ch == '\t' || ch == '\r' || ch == '\n' || isSeparator ch
|
||||||
export
|
export
|
||||||
%foreign "scheme:string-normalize-nfc"
|
%foreign "scheme:string-normalize-nfc"
|
||||||
normalizeNfc : String -> String
|
normalizeNfc : String -> String
|
||||||
|
|
||||||
|
|
||||||
|
export
|
||||||
|
isCodepoint : Int -> Bool
|
||||||
|
isCodepoint n =
|
||||||
|
n <= 0x10FFFF &&
|
||||||
|
not (n >= 0xD800 && n <= 0xDBFF || n >= 0xDC00 && n <= 0xDFFF)
|
||||||
|
|
|
@ -4,6 +4,7 @@ import public Data.Nat
|
||||||
import Data.Nat.Division
|
import Data.Nat.Division
|
||||||
import Data.SnocList
|
import Data.SnocList
|
||||||
import Data.Vect
|
import Data.Vect
|
||||||
|
import Data.String
|
||||||
|
|
||||||
%default total
|
%default total
|
||||||
|
|
||||||
|
@ -52,6 +53,42 @@ parameters {base : Nat} {auto 0 _ : base `GTE` 2} (chars : Vect base Char)
|
||||||
showAtBase : Nat -> String
|
showAtBase : Nat -> String
|
||||||
showAtBase = pack . showAtBase' []
|
showAtBase = pack . showAtBase' []
|
||||||
|
|
||||||
export
|
namespace Nat
|
||||||
showHex : Nat -> String
|
export
|
||||||
showHex = showAtBase $ fromList $ unpack "0123456789ABCDEF"
|
showHex : Nat -> String
|
||||||
|
showHex = showAtBase $ fromList $ unpack "0123456789abcdef"
|
||||||
|
|
||||||
|
namespace Int
|
||||||
|
export
|
||||||
|
showHex : Int -> String
|
||||||
|
showHex x =
|
||||||
|
if x < 0 then "-" ++ Nat.showHex (cast (-x)) else Nat.showHex (cast x)
|
||||||
|
|
||||||
|
|
||||||
|
namespace Int
|
||||||
|
export
|
||||||
|
fromHexit : Char -> Maybe Int
|
||||||
|
fromHexit c =
|
||||||
|
if c >= '0' && c <= '9' then Just $ ord c - ord '0'
|
||||||
|
else if c >= 'a' && c <= 'f' then Just $ ord c - ord 'a' + 10
|
||||||
|
else if c >= 'A' && c <= 'F' then Just $ ord c - ord 'A' + 10
|
||||||
|
else Nothing
|
||||||
|
|
||||||
|
private
|
||||||
|
fromHex' : Int -> String -> Maybe Int
|
||||||
|
fromHex' acc str = case strM str of
|
||||||
|
StrNil => Just acc
|
||||||
|
StrCons c cs => fromHex' (16 * acc + !(fromHexit c)) (assert_smaller str cs)
|
||||||
|
|
||||||
|
export %inline
|
||||||
|
fromHex : String -> Maybe Int
|
||||||
|
fromHex = fromHex' 0
|
||||||
|
|
||||||
|
namespace Nat
|
||||||
|
export
|
||||||
|
fromHexit : Char -> Maybe Nat
|
||||||
|
fromHexit = map cast . Int.fromHexit
|
||||||
|
|
||||||
|
export %inline
|
||||||
|
fromHex : String -> Maybe Nat
|
||||||
|
fromHex = map cast . Int.fromHex
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
module Quox.Parser.Lexer
|
module Quox.Parser.Lexer
|
||||||
|
|
||||||
import Quox.CharExtra
|
import Quox.CharExtra
|
||||||
|
import Quox.NatExtra
|
||||||
import Quox.Name
|
import Quox.Name
|
||||||
import Data.String.Extra
|
import Data.String.Extra
|
||||||
import Data.SortedMap
|
import Data.SortedMap
|
||||||
|
@ -72,29 +73,84 @@ tmatch t f = match t (T . f)
|
||||||
|
|
||||||
|
|
||||||
export
|
export
|
||||||
fromStringLit : String -> String
|
fromStringLit : (String -> Token) -> String -> ExtToken
|
||||||
fromStringLit = pack . go . unpack . drop 1 . dropLast 1 where
|
fromStringLit f str =
|
||||||
go : List Char -> List Char
|
case go $ unpack $ drop 1 $ dropLast 1 str of
|
||||||
go [] = []
|
Left err => Invalid err str
|
||||||
go ['\\'] = ['\\'] -- i guess???
|
Right ok => T $ f $ pack ok
|
||||||
go ('\\' :: 'n' :: cs) = '\n' :: go cs
|
where
|
||||||
go ('\\' :: 't' :: cs) = '\t' :: go cs
|
Interpolation Char where interpolate = singleton
|
||||||
|
|
||||||
|
go, hexEscape : List Char -> Either String (List Char)
|
||||||
|
|
||||||
|
go [] = Right []
|
||||||
|
go ['\\'] = Left "string ends with \\"
|
||||||
|
go ('\\' :: 'n' :: cs) = ('\n' ::) <$> go cs
|
||||||
|
go ('\\' :: 't' :: cs) = ('\t' ::) <$> go cs
|
||||||
|
go ('\\' :: 'x' :: cs) = hexEscape cs
|
||||||
|
go ('\\' :: 'X' :: cs) = hexEscape cs
|
||||||
|
go ('\\' :: '\\' :: cs) = ('\\' ::) <$> go cs
|
||||||
|
go ('\\' :: '"' :: cs) = ('"' ::) <$> go cs
|
||||||
-- [todo] others
|
-- [todo] others
|
||||||
go ('\\' :: c :: cs) = c :: go cs
|
go ('\\' :: c :: _) = Left "unknown escape '\{c}'"
|
||||||
go (c :: cs) = c :: go cs
|
go (c :: cs) = (c ::) <$> go cs
|
||||||
|
|
||||||
|
hexEscape cs =
|
||||||
|
case break (== ';') cs of
|
||||||
|
(hs, ';' :: rest) => do
|
||||||
|
let hs = pack hs
|
||||||
|
let Just c = Int.fromHex hs
|
||||||
|
| Nothing => Left #"invalid hex string "\#{hs}" in escape"#
|
||||||
|
if isCodepoint c
|
||||||
|
then (chr c ::) <$> go (assert_smaller cs rest)
|
||||||
|
else Left "codepoint \{hs} out of range"
|
||||||
|
_ => Left "unterminated hex escape"
|
||||||
|
|
||||||
private
|
private
|
||||||
string : Tokenizer ExtToken
|
string : Tokenizer ExtToken
|
||||||
string = tmatch stringLit (Str . fromStringLit)
|
string = match stringLit $ fromStringLit Str
|
||||||
|
|
||||||
|
|
||||||
|
%hide binLit
|
||||||
|
%hide octLit
|
||||||
|
%hide hexLit
|
||||||
|
|
||||||
private
|
private
|
||||||
nat : Tokenizer ExtToken
|
nat : Tokenizer ExtToken
|
||||||
nat = tmatch (some (range '0' '9')) (Nat . cast)
|
nat = match hexLit fromHexLit
|
||||||
|
<|> tmatch decLit fromDecLit
|
||||||
|
where
|
||||||
|
withUnderscores : Lexer -> Lexer
|
||||||
|
withUnderscores l = l <+> many (opt (is '_') <+> l)
|
||||||
|
|
||||||
|
withoutUnderscores : String -> String
|
||||||
|
withoutUnderscores = pack . go . unpack where
|
||||||
|
go : List Char -> List Char
|
||||||
|
go [] = []
|
||||||
|
go ('_' :: cs) = go cs
|
||||||
|
go (c :: cs) = c :: go cs
|
||||||
|
|
||||||
|
decLit =
|
||||||
|
withUnderscores (range '0' '9') <+> reject idContEnd
|
||||||
|
|
||||||
|
hexLit =
|
||||||
|
approx "0x" <+>
|
||||||
|
withUnderscores (range '0' '9' <|> range 'a' 'f' <|> range 'A' 'F') <+>
|
||||||
|
reject idContEnd
|
||||||
|
|
||||||
|
fromDecLit : String -> Token
|
||||||
|
fromDecLit = Nat . cast . withoutUnderscores
|
||||||
|
|
||||||
|
fromHexLit : String -> ExtToken
|
||||||
|
fromHexLit str =
|
||||||
|
maybe (Invalid "invalid hex sequence" str) (T . Nat) $
|
||||||
|
fromHex $ withoutUnderscores $ drop 2 str
|
||||||
|
|
||||||
|
|
||||||
private
|
private
|
||||||
tag : Tokenizer ExtToken
|
tag : Tokenizer ExtToken
|
||||||
tag = tmatch (is '\'' <+> name) (Tag . drop 1)
|
tag = tmatch (is '\'' <+> name) (Tag . drop 1)
|
||||||
<|> tmatch (is '\'' <+> stringLit) (Tag . fromStringLit . drop 1)
|
<|> match (is '\'' <+> stringLit) (fromStringLit Tag . drop 1)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -7,6 +7,7 @@ import Quox.Pretty
|
||||||
|
|
||||||
import Quox.EffExtra
|
import Quox.EffExtra
|
||||||
import Quox.CharExtra
|
import Quox.CharExtra
|
||||||
|
import Quox.NatExtra
|
||||||
import Data.DPair
|
import Data.DPair
|
||||||
import Data.List1
|
import Data.List1
|
||||||
import Data.String
|
import Data.String
|
||||||
|
@ -278,27 +279,15 @@ prelude = """
|
||||||
;;;;;;
|
;;;;;;
|
||||||
"""
|
"""
|
||||||
|
|
||||||
export
|
|
||||||
toHex : Int -> String
|
|
||||||
toHex x =
|
|
||||||
case toHex' x of
|
|
||||||
[<] => "0"
|
|
||||||
cs => concatMap singleton cs
|
|
||||||
where
|
|
||||||
toHex' : Int -> SnocList Char
|
|
||||||
toHex' x =
|
|
||||||
if x == 0 then [<] else
|
|
||||||
let d = x `div` 16
|
|
||||||
m = x `mod` 16 in
|
|
||||||
toHex' (assert_smaller x d) :<
|
|
||||||
assert_total strIndex "0123456789abcdef" m
|
|
||||||
|
|
||||||
export
|
export
|
||||||
escape : String -> String
|
escape : String -> String
|
||||||
escape = concatMap esc1 . unpack where
|
escape = foldMap esc1 . unpack where
|
||||||
esc1 : Char -> String
|
esc1 : Char -> String
|
||||||
esc1 c = if c < ' ' || c > '~' || c == '\\' || c == '"'
|
esc1 c =
|
||||||
then "\\x" ++ toHex (ord c) ++ ";"
|
if c == '\\' || c == '"' then
|
||||||
|
"\\" ++ singleton c
|
||||||
|
else if c < ' ' || c > '~' then
|
||||||
|
"\\x" ++ showHex (ord c) ++ ";"
|
||||||
else singleton c
|
else singleton c
|
||||||
|
|
||||||
export covering
|
export covering
|
||||||
|
|
|
@ -153,10 +153,26 @@ tests = "lexer" :- [
|
||||||
lexes #" "\"" "# [Str "\""],
|
lexes #" "\"" "# [Str "\""],
|
||||||
lexes #" "\\" "# [Str "\\"],
|
lexes #" "\\" "# [Str "\\"],
|
||||||
lexes #" "\\\"" "# [Str "\\\""],
|
lexes #" "\\\"" "# [Str "\\\""],
|
||||||
todo "other escapes"
|
lexes #" "\t" "# [Str "\t"],
|
||||||
|
lexes #" "\n" "# [Str "\n"],
|
||||||
|
lexes #" "🐉" "# [Str "🐉"],
|
||||||
|
lexes #" "\x1f409;" "# [Str "🐉"],
|
||||||
|
lexFail #" "\q" "#,
|
||||||
|
lexFail #" "\" "#
|
||||||
],
|
],
|
||||||
|
|
||||||
todo "naturals",
|
"naturals" :- [
|
||||||
|
lexes "0" [Nat 0],
|
||||||
|
lexes "123" [Nat 123],
|
||||||
|
lexes "69_420" [Nat 69420],
|
||||||
|
lexes "0x123" [Nat 0x123],
|
||||||
|
lexes "0xbeef" [Nat 0xbeef],
|
||||||
|
lexes "0xBEEF" [Nat 0xBEEF],
|
||||||
|
lexes "0XBEEF" [Nat 0xBEEF],
|
||||||
|
lexes "0xbaba_baba" [Nat 0xbabababa],
|
||||||
|
lexFail "123abc",
|
||||||
|
lexFail "0x123abcghi"
|
||||||
|
],
|
||||||
|
|
||||||
"universes" :- [
|
"universes" :- [
|
||||||
lexes "Type0" [TYPE 0],
|
lexes "Type0" [TYPE 0],
|
||||||
|
|
Loading…
Reference in a new issue