diff --git a/lib/Quox/Lexer.idr b/lib/Quox/Lexer.idr index 28dff6c..8144571 100644 --- a/lib/Quox/Lexer.idr +++ b/lib/Quox/Lexer.idr @@ -16,14 +16,20 @@ import Derive.Prelude %language ElabReflection -||| @ R reserved token -||| @ I identifier -||| @ N nat literal -||| @ S string literal -||| @ T tag literal +||| @ Reserved reserved token +||| @ Name name, possibly qualified +||| @ Nat nat literal +||| @ String string literal +||| @ Tag tag literal ||| @ TYPE "Type" or "★" with subscript public export -data Token = R String | I Name | N Nat | S String | T String | TYPE Nat +data Token = + Reserved String + | Name Name + | Nat Nat + | Str String + | Tag String + | TYPE Nat %runElab derive "Token" [Eq, Ord, Show] -- token or whitespace @@ -85,7 +91,7 @@ nameL = baseNameL <+> many (is '.' <+> baseNameL) private name : Tokenizer TokenW -name = match nameL $ I . fromList . split (== '.') . normalizeNfc +name = match nameL $ Name . fromList . split (== '.') . normalizeNfc ||| [todo] escapes other than `\"` and (accidentally) `\\` export @@ -99,16 +105,16 @@ fromStringLit = pack . go . unpack . drop 1 . dropLast 1 where private string : Tokenizer TokenW -string = match stringLit (S . fromStringLit) +string = match stringLit (Str . fromStringLit) private nat : Tokenizer TokenW -nat = match (some (range '0' '9')) (N . cast) +nat = match (some (range '0' '9')) (Nat . cast) private tag : Tokenizer TokenW -tag = match (is '\'' <+> nameL) (T . drop 1) - <|> match (is '\'' <+> stringLit) (T . fromStringLit . drop 1) +tag = match (is '\'' <+> nameL) (Tag . drop 1) + <|> match (is '\'' <+> stringLit) (Tag . fromStringLit . drop 1) @@ -140,89 +146,91 @@ choice : (xs : List (Tokenizer a)) -> (0 _ : NonEmpty xs) => Tokenizer a choice (t :: ts) = foldl (\a, b => a <|> b) t ts -namespace Res +namespace Reserved ||| description of a reserved symbol - ||| @ W a reserved word (must not be followed by letters, digits, etc) - ||| @ S a reserved symbol (must not be followed by symbolic chars) - ||| @ X a character that doesn't show up in names (brackets, etc) + ||| @ Word a reserved word (must not be followed by letters, digits, etc) + ||| @ Sym a reserved symbol (must not be followed by symbolic chars) + ||| @ Punc a character that doesn't show up in names (brackets, etc) public export - data Res1 = W String | S String | X Char - %runElab derive "Res1" [Eq, Ord, Show] + data Reserved1 = Word String | Sym String | Punc Char + %runElab derive "Reserved1" [Eq, Ord, Show] ||| description of a token that might have unicode & ascii-only aliases public export - data Res = Only Res1 | Or Res1 Res1 - %runElab derive "Res" [Eq, Ord, Show] + data Reserved = Only Reserved1 | Or Reserved1 Reserved1 + %runElab derive "Reserved" [Eq, Ord, Show] public export - S1, W1 : String -> Res - S1 = Only . S - W1 = Only . W + Sym1, Word1 : String -> Reserved + Sym1 = Only . Sym + Word1 = Only . Word public export - X1 : Char -> Res - X1 = Only . X + Punc1 : Char -> Reserved + Punc1 = Only . Punc public export -resString1 : Res1 -> String -resString1 (X x) = singleton x -resString1 (W w) = w -resString1 (S s) = s +resString1 : Reserved1 -> String +resString1 (Punc x) = singleton x +resString1 (Word w) = w +resString1 (Sym s) = s ||| return the representative string for a token description. if there are ||| two, then it's the first one, which should be the full-unicode one public export -resString : Res -> String +resString : Reserved -> String resString (Only r) = resString1 r resString (r `Or` _) = resString1 r private -resTokenizer1 : Res1 -> String -> Tokenizer TokenW +resTokenizer1 : Reserved1 -> String -> Tokenizer TokenW resTokenizer1 r str = - let res : String -> Token := const $ R str in - case r of W w => match (exact w <+> reject idContEnd) res - S s => match (exact s <+> reject symCont) res - X x => match (is x) res + let res : String -> Token := const $ Reserved str in + case r of Word w => match (exact w <+> reject idContEnd) res + Sym s => match (exact s <+> reject symCont) res + Punc x => match (is x) res ||| match a reserved token export -resTokenizer : Res -> Tokenizer TokenW +resTokenizer : Reserved -> Tokenizer TokenW resTokenizer (Only r) = resTokenizer1 r (resString1 r) resTokenizer (r `Or` s) = resTokenizer1 r (resString1 r) <|> resTokenizer1 s (resString1 r) ||| reserved words & symbols. -||| the tokens recognised by ``a `Or` b`` will be `R a`. e.g. `=>` in the input -||| (if not part of a longer name) will be returned as `R "⇒"`. +||| the tokens recognised by ``a `Or` b`` will be `Reserved a`. +||| e.g. `=>` in the input (if not part of a longer name) +||| will be returned as `Reserved "⇒"`. public export -reserved : List Res +reserved : List Reserved reserved = - [X1 '(', X1 ')', X1 '[', X1 ']', X1 '{', X1 '}', X1 ',', X1 ';', - S1 "@", - S1 ":", - S "⇒" `Or` S "=>", - S "→" `Or` S "->", - S "×" `Or` S "**", - S "≡" `Or` S "==", - S "∷" `Or` S "::", - S "·" `Or` X '.', - W1 "case", - W1 "case1", - W "caseω" `Or` W "case#", - W1 "return", - W1 "of", - W1 "_", - W1 "Eq", - W "λ" `Or` W "fun", - W "δ" `Or` W "dfun", - W "ω" `Or` S "#", - S "★" `Or` W "Type", - W1 "def", - W1 "def0", - W "defω" `Or` W "def#", - S "≔" `Or` S ":="] + [Punc1 '(', Punc1 ')', Punc1 '[', Punc1 ']', Punc1 '{', Punc1 '}', + Punc1 ',', Punc1 ';', + Sym1 "@", + Sym1 ":", + Sym "⇒" `Or` Sym "=>", + Sym "→" `Or` Sym "->", + Sym "×" `Or` Sym "**", + Sym "≡" `Or` Sym "==", + Sym "∷" `Or` Sym "::", + Sym "·" `Or` Punc '.', + Word1 "case", + Word1 "case1", + Word "caseω" `Or` Word "case#", + Word1 "return", + Word1 "of", + Word1 "_", + Word1 "Eq", + Word "λ" `Or` Word "fun", + Word "δ" `Or` Word "dfun", + Word "ω" `Or` Sym "#", + Sym "★" `Or` Word "Type", + Word1 "def", + Word1 "def0", + Word "defω" `Or` Word "def#", + Sym "≔" `Or` Sym ":="] -||| `IsReserved str` is true if `R str` might actually show up in +||| `IsReserved str` is true if `Reserved str` might actually show up in ||| the token stream public export IsReserved : String -> Type diff --git a/lib/Quox/Parser.idr b/lib/Quox/Parser.idr index 42a1f4d..9276178 100644 --- a/lib/Quox/Parser.idr +++ b/lib/Quox/Parser.idr @@ -19,7 +19,7 @@ Grammar = Core.Grammar () Token export res : (str : String) -> (0 _ : IsReserved str) => Grammar True () res str = terminal "expecting \"\{str}\"" $ - \x => guard $ x == R str + \x => guard $ x == Reserved str export resC : (str : String) -> (0 _ : IsReserved str) => Grammar True () @@ -65,28 +65,28 @@ darr = resC "⇒" export name : Grammar True Name name = terminal "expecting name" $ - \case I i => Just i; _ => Nothing + \case Name i => Just i; _ => Nothing export baseName : Grammar True BaseName baseName = terminal "expecting unqualified name" $ - \case I i => guard (null i.mods) $> i.base - _ => Nothing + \case Name i => guard (null i.mods) $> i.base + _ => Nothing export nat : Grammar True Nat nat = terminal "expecting natural number" $ - \case N n => pure n; _ => Nothing + \case Nat n => pure n; _ => Nothing export string : Grammar True String string = terminal "expecting string literal" $ - \case S s => pure s; _ => Nothing + \case Str s => pure s; _ => Nothing export tag : Grammar True String tag = terminal "expecting tag constructor" $ - \case T t => pure t; _ => Nothing + \case Tag t => pure t; _ => Nothing export bareTag : Grammar True String @@ -105,7 +105,7 @@ bname = Nothing <$ res "_" export zeroOne : (zero, one : a) -> Grammar True a zeroOne zero one = terminal "expecting zero or one" $ - \case N 0 => Just zero; N 1 => Just one; _ => Nothing + \case Nat 0 => Just zero; Nat 1 => Just one; _ => Nothing export covering @@ -230,14 +230,17 @@ mutual <|> resC "Eq" *> [|Eq (bracks optBinderTerm) aTerm aTerm|] <|> [|apply aTerm (many appArg)|] where - data PArg = T PTerm | D PDim + data PArg = TermArg PTerm | DimArg PDim appArg : Grammar True PArg - appArg = [|D $ resC "@" *> dim|] - <|> [|T aTerm|] + appArg = [|DimArg $ resC "@" *> dim|] + <|> [|TermArg aTerm|] apply : PTerm -> List PArg -> PTerm - apply = foldl $ \f, x => case x of T x => f :@ x; D p => f :% p + apply = foldl apply1 where + apply1 : PTerm -> PArg -> PTerm + apply1 f (TermArg x) = f :@ x + apply1 f (DimArg p) = f :% p private covering aTerm : Grammar True PTerm diff --git a/tests/Tests/Lexer.idr b/tests/Tests/Lexer.idr index 0a1a105..4fbde87 100644 --- a/tests/Tests/Lexer.idr +++ b/tests/Tests/Lexer.idr @@ -46,97 +46,104 @@ tests = "lexer" :- [ lexes "" [], lexes " " [], lexes "-- line comment" [], - lexes "name -- line comment" [I "name"], - lexes "-- line comment\nnameBetween -- and another" [I "nameBetween"], + lexes "name -- line comment" [Name "name"], + lexes "-- line comment\nnameBetween -- and another" [Name "nameBetween"], lexes "{- block comment -}" [], lexes "{- {- nested -} block comment -}" [] ], "identifiers & keywords" :- [ - lexes "abc" [I "abc"], - lexes "abc def" [I "abc", R "def"], - lexes "abc_def" [I "abc_def"], - lexes "abc-def" [I "abc-def"], - lexes "abc{-comment-}def" [I "abc", R "def"], - lexes "λ" [R "λ"], - lexes "fun" [R "λ"], - lexes "δ" [R "δ"], - lexes "dfun" [R "δ"], - lexes "funky" [I "funky"], - lexes "δελτα" [I "δελτα"], - lexes "★★" [I "★★"], - lexes "Types" [I "Types"], - lexes "a.b.c.d.e" [I $ MakeName [< "a","b","c","d"] "e"], - lexes "normalïse" [I "normalïse"], - lexes "map#" [I "map#"], - lexes "write!" [I "write!"], - lexes "uhh??!?!?!?" [I "uhh??!?!?!?"], + lexes "abc" [Name "abc"], + lexes "abc def" [Name "abc", Reserved "def"], + lexes "abc_def" [Name "abc_def"], + lexes "abc-def" [Name "abc-def"], + lexes "abc{-comment-}def" [Name "abc", Reserved "def"], + lexes "λ" [Reserved "λ"], + lexes "fun" [Reserved "λ"], + lexes "δ" [Reserved "δ"], + lexes "dfun" [Reserved "δ"], + lexes "funky" [Name "funky"], + lexes "δελτα" [Name "δελτα"], + lexes "★★" [Name "★★"], + lexes "Types" [Name "Types"], + lexes "a.b.c.d.e" [Name $ MakeName [< "a","b","c","d"] "e"], + lexes "normalïse" [Name "normalïse"], -- ↑ replace i + combining ¨ with precomposed ï + lexes "map#" [Name "map#"], + lexes "write!" [Name "write!"], + lexes "uhh??!?!?!?" [Name "uhh??!?!?!?"], todo "check for reserved words in a qname", - -- lexes "abc.fun.def" [I "abc", R ".", R "λ", R ".", I "def"], + {- + lexes "abc.fun.def" + [Name "abc", Reserved ".", Reserved "λ", Reserved ".", Name "def"], + -} - lexes "+" [I "+"], - lexes "*" [I "*"], - lexes "**" [R "×"], - lexes "***" [I "***"], - lexes "+**" [I "+**"], - lexes "+#" [I "+#"], - lexes "+.+.+" [I $ MakeName [< "+", "+"] "+"], - lexes "a.+" [I $ MakeName [< "a"] "+"], - lexes "+.a" [I $ MakeName [< "+"] "a"], + lexes "+" [Name "+"], + lexes "*" [Name "*"], + lexes "**" [Reserved "×"], + lexes "***" [Name "***"], + lexes "+**" [Name "+**"], + lexes "+#" [Name "+#"], + lexes "+.+.+" [Name $ MakeName [< "+", "+"] "+"], + lexes "a.+" [Name $ MakeName [< "a"] "+"], + lexes "+.a" [Name $ MakeName [< "+"] "a"], - lexes "+a" [I "+", I "a"], + lexes "+a" [Name "+", Name "a"], - lexes "x." [I "x", R "·"], - lexes "&." [I "&", R "·"], - lexes ".x" [R "·", I "x"], - lexes "a.b.c." [I $ MakeName [< "a", "b"] "c", R "·"], + lexes "x." [Name "x", Reserved "·"], + lexes "&." [Name "&", Reserved "·"], + lexes ".x" [Reserved "·", Name "x"], + lexes "a.b.c." [Name $ MakeName [< "a", "b"] "c", Reserved "·"], - lexes "case" [R "case"], - lexes "caseω" [R "caseω"], - lexes "case#" [R "caseω"], - lexes "case1" [R "case1"], - lexes "case0" [I "case0"], - lexes "case##" [I "case##"], + lexes "case" [Reserved "case"], + lexes "caseω" [Reserved "caseω"], + lexes "case#" [Reserved "caseω"], + lexes "case1" [Reserved "case1"], + lexes "case0" [Name "case0"], + lexes "case##" [Name "case##"], - lexes "_" [R "_"], - lexes "_a" [I "_a"], - lexes "a_" [I "a_"], + lexes "_" [Reserved "_"], + lexes "_a" [Name "_a"], + lexes "a_" [Name "a_"], - lexes "a'" [I "a'"], - lexes "+'" [I "+'"] + lexes "a'" [Name "a'"], + lexes "+'" [Name "+'"] ], "syntax characters" :- [ - lexes "()" [R "(", R ")"], - lexes "(a)" [R "(", I "a", R ")"], - lexes "(^)" [R "(", I "^", R ")"], - lexes "{a,b}" [R "{", I "a", R ",", I "b", R "}"], - lexes "{+,-}" [R "{", I "+", R ",", I "-", R "}"] + lexes "()" [Reserved "(", Reserved ")"], + lexes "(a)" [Reserved "(", Name "a", Reserved ")"], + lexes "(^)" [Reserved "(", Name "^", Reserved ")"], + lexes "{a,b}" + [Reserved "{", Name "a", Reserved ",", Name "b", Reserved "}"], + lexes "{+,-}" + [Reserved "{", Name "+", Reserved ",", Name "-", Reserved "}"] ], "tags" :- [ - lexes #" 'a "# [T "a"], - lexes #" 'abc "# [T "abc"], - lexes #" '+ "# [T "+"], - lexes #" '$$$ "# [T "$$$"], - lexes #" 'tag.with.dots "# [T "tag.with.dots"], - lexes #" '"multi word tag" "# [T "multi word tag"], - lexes #" '"" "# [T ""], + lexes #" 'a "# [Tag "a"], + lexes #" 'abc "# [Tag "abc"], + lexes #" '+ "# [Tag "+"], + lexes #" '$$$ "# [Tag "$$$"], + lexes #" 'tag.with.dots "# [Tag "tag.with.dots"], + lexes #" '"multi word tag" "# [Tag "multi word tag"], + lexes #" '"" "# [Tag ""], lexFail #" ' "#, lexFail #" '' "# ], "strings" :- [ - lexes #" "" "# [S ""], - lexes #" "abc" "# [S "abc"], - lexes #" "\"" "# [S "\""], - lexes #" "\\" "# [S "\\"], - lexes #" "\\\"" "# [S "\\\""], + lexes #" "" "# [Str ""], + lexes #" "abc" "# [Str "abc"], + lexes #" "\"" "# [Str "\""], + lexes #" "\\" "# [Str "\\"], + lexes #" "\\\"" "# [Str "\\\""], todo "other escapes" ], + todo "naturals", + "universes" :- [ lexes "Type0" [TYPE 0], lexes "Type₀" [TYPE 0], @@ -144,7 +151,7 @@ tests = "lexer" :- [ lexes "★₀" [TYPE 0], lexes "★₆₉" [TYPE 69], lexes "★4" [TYPE 4], - lexes "Type" [R "★"], - lexes "★" [R "★"] + lexes "Type" [Reserved "★"], + lexes "★" [Reserved "★"] ] ]