change some single-character constructor names

2023-03-08 16:46:29 +01:00 · 2023-03-08 16:46:29 +01:00 · 88985405ce
commit 88985405ce
parent 47fca359f4
3 changed files with 158 additions and 140 deletions
--- a/lib/Quox/Lexer.idr
+++ b/lib/Quox/Lexer.idr
@ -16,14 +16,20 @@ import Derive.Prelude
 %language ElabReflection


-||| @ R reserved token
-||| @ I identifier
-||| @ N nat literal
-||| @ S string literal
-||| @ T tag literal
+||| @ Reserved reserved token
+||| @ Name name, possibly qualified
+||| @ Nat nat literal
+||| @ String string literal
+||| @ Tag tag literal
 ||| @ TYPE "Type" or "★" with subscript
 public export
-data Token = R String | I Name | N Nat | S String | T String | TYPE Nat
+data Token =
+    Reserved String
+  | Name Name
+  | Nat Nat
+  | Str String
+  | Tag String
+  | TYPE Nat
 %runElab derive "Token" [Eq, Ord, Show]

 -- token or whitespace
@ -85,7 +91,7 @@ nameL = baseNameL <+> many (is '.' <+> baseNameL)

 private
 name : Tokenizer TokenW
-name = match nameL $ I . fromList . split (== '.') . normalizeNfc
+name = match nameL $ Name . fromList . split (== '.') . normalizeNfc

 ||| [todo] escapes other than `\"` and (accidentally) `\\`
 export
@ -99,16 +105,16 @@ fromStringLit = pack . go . unpack . drop 1 . dropLast 1 where

 private
 string : Tokenizer TokenW
-string = match stringLit (S . fromStringLit)
+string = match stringLit (Str . fromStringLit)

 private
 nat : Tokenizer TokenW
-nat = match (some (range '0' '9')) (N . cast)
+nat = match (some (range '0' '9')) (Nat . cast)

 private
 tag : Tokenizer TokenW
-tag = match (is '\'' <+> nameL)     (T . drop 1)
-  <|> match (is '\'' <+> stringLit) (T . fromStringLit . drop 1)
+tag = match (is '\'' <+> nameL)     (Tag . drop 1)
+  <|> match (is '\'' <+> stringLit) (Tag . fromStringLit . drop 1)



@ -140,89 +146,91 @@ choice : (xs : List (Tokenizer a)) -> (0 _ : NonEmpty xs) => Tokenizer a
 choice (t :: ts) = foldl (\a, b => a <|> b) t ts


-namespace Res
+namespace Reserved
  ||| description of a reserved symbol
-  ||| @ W a reserved word (must not be followed by letters, digits, etc)
-  ||| @ S a reserved symbol (must not be followed by symbolic chars)
-  ||| @ X a character that doesn't show up in names (brackets, etc)
+  ||| @ Word a reserved word (must not be followed by letters, digits, etc)
+  ||| @ Sym a reserved symbol (must not be followed by symbolic chars)
+  ||| @ Punc a character that doesn't show up in names (brackets, etc)
  public export
-  data Res1 = W String | S String | X Char
-  %runElab derive "Res1" [Eq, Ord, Show]
+  data Reserved1 = Word String | Sym String | Punc Char
+  %runElab derive "Reserved1" [Eq, Ord, Show]

  ||| description of a token that might have unicode & ascii-only aliases
  public export
-  data Res = Only Res1 | Or Res1 Res1
-  %runElab derive "Res" [Eq, Ord, Show]
+  data Reserved = Only Reserved1 | Or Reserved1 Reserved1
+  %runElab derive "Reserved" [Eq, Ord, Show]

  public export
-  S1, W1 : String -> Res
-  S1 = Only . S
-  W1 = Only . W
+  Sym1, Word1 : String -> Reserved
+  Sym1 = Only . Sym
+  Word1 = Only . Word

  public export
-  X1 : Char -> Res
-  X1 = Only . X
+  Punc1 : Char -> Reserved
+  Punc1 = Only . Punc

 public export
-resString1 : Res1 -> String
-resString1 (X x) = singleton x
-resString1 (W w) = w
-resString1 (S s) = s
+resString1 : Reserved1 -> String
+resString1 (Punc x) = singleton x
+resString1 (Word w) = w
+resString1 (Sym  s) = s

 ||| return the representative string for a token description. if there are
 ||| two, then it's the first one, which should be the full-unicode one
 public export
-resString : Res -> String
+resString : Reserved -> String
 resString (Only r)   = resString1 r
 resString (r `Or` _) = resString1 r

 private
-resTokenizer1 : Res1 -> String -> Tokenizer TokenW
+resTokenizer1 : Reserved1 -> String -> Tokenizer TokenW
 resTokenizer1 r str =
-  let res : String -> Token := const $ R str in
-  case r of W w => match (exact w <+> reject idContEnd) res
-            S s => match (exact s <+> reject symCont)   res
-            X x => match (is x)                         res
+  let res : String -> Token := const $ Reserved str in
+  case r of Word w => match (exact w <+> reject idContEnd) res
+            Sym s  => match (exact s <+> reject symCont)   res
+            Punc x => match (is x)                         res

 ||| match a reserved token
 export
-resTokenizer : Res -> Tokenizer TokenW
+resTokenizer : Reserved -> Tokenizer TokenW
 resTokenizer (Only r)   = resTokenizer1 r (resString1 r)
 resTokenizer (r `Or` s) =
  resTokenizer1 r (resString1 r) <|> resTokenizer1 s (resString1 r)

 ||| reserved words & symbols.
-||| the tokens recognised by ``a `Or` b`` will be `R a`. e.g. `=>` in the input
-||| (if not part of a longer name) will be returned as `R "⇒"`.
+||| the tokens recognised by ``a `Or` b`` will be `Reserved a`.
+||| e.g. `=>` in the input (if not part of a longer name)
+||| will be returned as `Reserved "⇒"`.
 public export
-reserved : List Res
+reserved : List Reserved
 reserved =
-  [X1 '(', X1 ')', X1 '[', X1 ']', X1 '{', X1 '}', X1 ',', X1 ';',
-   S1 "@",
-   S1 ":",
-   S "⇒" `Or` S "=>",
-   S "→" `Or` S "->",
-   S "×" `Or` S "**",
-   S "≡" `Or` S "==",
-   S "∷" `Or` S "::",
-   S "·" `Or` X '.',
-   W1 "case",
-   W1 "case1",
-   W "caseω" `Or` W "case#",
-   W1 "return",
-   W1 "of",
-   W1 "_",
-   W1 "Eq",
-   W "λ" `Or` W "fun",
-   W "δ" `Or` W "dfun",
-   W "ω" `Or` S "#",
-   S "★" `Or` W "Type",
-   W1 "def",
-   W1 "def0",
-   W "defω" `Or` W "def#",
-   S "≔" `Or` S ":="]
+  [Punc1 '(', Punc1 ')', Punc1 '[', Punc1 ']', Punc1 '{', Punc1 '}',
+   Punc1 ',', Punc1 ';',
+   Sym1 "@",
+   Sym1 ":",
+   Sym "⇒" `Or` Sym "=>",
+   Sym "→" `Or` Sym "->",
+   Sym "×" `Or` Sym "**",
+   Sym "≡" `Or` Sym "==",
+   Sym "∷" `Or` Sym "::",
+   Sym "·" `Or` Punc '.',
+   Word1 "case",
+   Word1 "case1",
+   Word "caseω" `Or` Word "case#",
+   Word1 "return",
+   Word1 "of",
+   Word1 "_",
+   Word1 "Eq",
+   Word "λ" `Or` Word "fun",
+   Word "δ" `Or` Word "dfun",
+   Word "ω" `Or` Sym "#",
+   Sym "★" `Or` Word "Type",
+   Word1 "def",
+   Word1 "def0",
+   Word "defω" `Or` Word "def#",
+   Sym "≔" `Or` Sym ":="]

-||| `IsReserved str` is true if `R str` might actually show up in
+||| `IsReserved str` is true if `Reserved str` might actually show up in
 ||| the token stream
 public export
 IsReserved : String -> Type
--- a/lib/Quox/Parser.idr
+++ b/lib/Quox/Parser.idr
@ -19,7 +19,7 @@ Grammar = Core.Grammar () Token
 export
 res : (str : String) -> (0 _ : IsReserved str) => Grammar True ()
 res str = terminal "expecting \"\{str}\"" $
-  \x => guard $ x == R str
+  \x => guard $ x == Reserved str

 export
 resC : (str : String) -> (0 _ : IsReserved str) => Grammar True ()
@ -65,28 +65,28 @@ darr = resC "⇒"
 export
 name : Grammar True Name
 name = terminal "expecting name" $
-  \case I i => Just i; _ => Nothing
+  \case Name i => Just i; _ => Nothing

 export
 baseName : Grammar True BaseName
 baseName = terminal "expecting unqualified name" $
-  \case I i => guard (null i.mods) $> i.base
-        _   => Nothing
+  \case Name i => guard (null i.mods) $> i.base
+        _      => Nothing

 export
 nat : Grammar True Nat
 nat = terminal "expecting natural number" $
-  \case N n => pure n; _ => Nothing
+  \case Nat n => pure n; _ => Nothing

 export
 string : Grammar True String
 string = terminal "expecting string literal" $
-  \case S s => pure s; _ => Nothing
+  \case Str s => pure s; _ => Nothing

 export
 tag : Grammar True String
 tag = terminal "expecting tag constructor" $
-  \case T t => pure t; _ => Nothing
+  \case Tag t => pure t; _ => Nothing

 export
 bareTag : Grammar True String
@ -105,7 +105,7 @@ bname = Nothing <$ res "_"
 export
 zeroOne : (zero, one : a) -> Grammar True a
 zeroOne zero one = terminal "expecting zero or one" $
-  \case N 0 => Just zero; N 1 => Just one; _ => Nothing
+  \case Nat 0 => Just zero; Nat 1 => Just one; _ => Nothing


 export covering
@ -230,14 +230,17 @@ mutual
        <|> resC "Eq" *> [|Eq (bracks optBinderTerm) aTerm aTerm|]
        <|> [|apply aTerm (many appArg)|]
  where
-    data PArg = T PTerm | D PDim
+    data PArg = TermArg PTerm | DimArg PDim

    appArg : Grammar True PArg
-    appArg = [|D $ resC "@" *> dim|]
-         <|> [|T aTerm|]
+    appArg = [|DimArg $ resC "@" *> dim|]
+         <|> [|TermArg aTerm|]

    apply : PTerm -> List PArg -> PTerm
-    apply = foldl $ \f, x => case x of T x => f :@ x; D p => f :% p
+    apply = foldl apply1 where
+      apply1 : PTerm -> PArg -> PTerm
+      apply1 f (TermArg x) = f :@ x
+      apply1 f (DimArg  p) = f :% p

  private covering
  aTerm : Grammar True PTerm
--- a/tests/Tests/Lexer.idr
+++ b/tests/Tests/Lexer.idr
@ -46,97 +46,104 @@ tests = "lexer" :- [
    lexes "" [],
    lexes "    " [],
    lexes "-- line comment" [],
-    lexes "name -- line comment" [I "name"],
-    lexes "-- line comment\nnameBetween -- and another" [I "nameBetween"],
+    lexes "name -- line comment" [Name "name"],
+    lexes "-- line comment\nnameBetween -- and another" [Name "nameBetween"],
    lexes "{- block comment -}" [],
    lexes "{- {- nested -} block comment -}" []
  ],

  "identifiers & keywords" :- [
-    lexes "abc" [I "abc"],
-    lexes "abc def" [I "abc", R "def"],
-    lexes "abc_def" [I "abc_def"],
-    lexes "abc-def" [I "abc-def"],
-    lexes "abc{-comment-}def" [I "abc", R "def"],
-    lexes "λ" [R "λ"],
-    lexes "fun" [R "λ"],
-    lexes "δ" [R "δ"],
-    lexes "dfun" [R "δ"],
-    lexes "funky" [I "funky"],
-    lexes "δελτα" [I "δελτα"],
-    lexes "★★" [I "★★"],
-    lexes "Types" [I "Types"],
-    lexes "a.b.c.d.e" [I $ MakeName [< "a","b","c","d"] "e"],
-    lexes "normalïse" [I "normalïse"],
-    lexes "map#" [I "map#"],
-    lexes "write!" [I "write!"],
-    lexes "uhh??!?!?!?" [I "uhh??!?!?!?"],
+    lexes "abc" [Name "abc"],
+    lexes "abc def" [Name "abc", Reserved "def"],
+    lexes "abc_def" [Name "abc_def"],
+    lexes "abc-def" [Name "abc-def"],
+    lexes "abc{-comment-}def" [Name "abc", Reserved "def"],
+    lexes "λ" [Reserved "λ"],
+    lexes "fun" [Reserved "λ"],
+    lexes "δ" [Reserved "δ"],
+    lexes "dfun" [Reserved "δ"],
+    lexes "funky" [Name "funky"],
+    lexes "δελτα" [Name "δελτα"],
+    lexes "★★" [Name "★★"],
+    lexes "Types" [Name "Types"],
+    lexes "a.b.c.d.e" [Name $ MakeName [< "a","b","c","d"] "e"],
+    lexes "normalïse" [Name "normalïse"],
      -- ↑ replace i + combining ¨ with precomposed ï
+    lexes "map#" [Name "map#"],
+    lexes "write!" [Name "write!"],
+    lexes "uhh??!?!?!?" [Name "uhh??!?!?!?"],

    todo "check for reserved words in a qname",
-    -- lexes "abc.fun.def" [I "abc", R ".", R "λ", R ".", I "def"],
+    {-
+    lexes "abc.fun.def"
+      [Name "abc", Reserved ".", Reserved "λ", Reserved ".", Name "def"],
+    -}

-    lexes "+" [I "+"],
-    lexes "*" [I "*"],
-    lexes "**" [R "×"],
-    lexes "***" [I "***"],
-    lexes "+**" [I "+**"],
-    lexes "+#" [I "+#"],
-    lexes "+.+.+" [I $ MakeName [< "+", "+"] "+"],
-    lexes "a.+" [I $ MakeName [< "a"] "+"],
-    lexes "+.a" [I $ MakeName [< "+"] "a"],
+    lexes "+" [Name "+"],
+    lexes "*" [Name "*"],
+    lexes "**" [Reserved "×"],
+    lexes "***" [Name "***"],
+    lexes "+**" [Name "+**"],
+    lexes "+#" [Name "+#"],
+    lexes "+.+.+" [Name $ MakeName [< "+", "+"] "+"],
+    lexes "a.+" [Name $ MakeName [< "a"] "+"],
+    lexes "+.a" [Name $ MakeName [< "+"] "a"],

-    lexes "+a" [I "+", I "a"],
+    lexes "+a" [Name "+", Name "a"],

-    lexes "x." [I "x", R "·"],
-    lexes "&." [I "&", R "·"],
-    lexes ".x" [R "·", I "x"],
-    lexes "a.b.c." [I $ MakeName [< "a", "b"] "c", R "·"],
+    lexes "x." [Name "x", Reserved "·"],
+    lexes "&." [Name "&", Reserved "·"],
+    lexes ".x" [Reserved "·", Name "x"],
+    lexes "a.b.c." [Name $ MakeName [< "a", "b"] "c", Reserved "·"],

-    lexes "case" [R "case"],
-    lexes "caseω" [R "caseω"],
-    lexes "case#" [R "caseω"],
-    lexes "case1" [R "case1"],
-    lexes "case0" [I "case0"],
-    lexes "case##" [I "case##"],
+    lexes "case" [Reserved "case"],
+    lexes "caseω" [Reserved "caseω"],
+    lexes "case#" [Reserved "caseω"],
+    lexes "case1" [Reserved "case1"],
+    lexes "case0" [Name "case0"],
+    lexes "case##" [Name "case##"],

-    lexes "_" [R "_"],
-    lexes "_a" [I "_a"],
-    lexes "a_" [I "a_"],
+    lexes "_" [Reserved "_"],
+    lexes "_a" [Name "_a"],
+    lexes "a_" [Name "a_"],

-    lexes "a'" [I "a'"],
-    lexes "+'" [I "+'"]
+    lexes "a'" [Name "a'"],
+    lexes "+'" [Name "+'"]
  ],

  "syntax characters" :- [
-    lexes "()" [R "(", R ")"],
-    lexes "(a)" [R "(", I "a", R ")"],
-    lexes "(^)" [R "(", I "^", R ")"],
-    lexes "{a,b}" [R "{", I "a", R ",", I "b", R "}"],
-    lexes "{+,-}" [R "{", I "+", R ",", I "-", R "}"]
+    lexes "()" [Reserved "(", Reserved ")"],
+    lexes "(a)" [Reserved "(", Name "a", Reserved ")"],
+    lexes "(^)" [Reserved "(", Name "^", Reserved ")"],
+    lexes "{a,b}"
+      [Reserved "{", Name "a", Reserved ",", Name "b", Reserved "}"],
+    lexes "{+,-}"
+      [Reserved "{", Name "+", Reserved ",", Name "-", Reserved "}"]
  ],

  "tags" :- [
-    lexes #" 'a "# [T "a"],
-    lexes #" 'abc "# [T "abc"],
-    lexes #" '+ "# [T "+"],
-    lexes #" '$$$ "# [T "$$$"],
-    lexes #" 'tag.with.dots "# [T "tag.with.dots"],
-    lexes #" '"multi word tag" "# [T "multi word tag"],
-    lexes #" '"" "# [T ""],
+    lexes #" 'a "# [Tag "a"],
+    lexes #" 'abc "# [Tag "abc"],
+    lexes #" '+ "# [Tag "+"],
+    lexes #" '$$$ "# [Tag "$$$"],
+    lexes #" 'tag.with.dots "# [Tag "tag.with.dots"],
+    lexes #" '"multi word tag" "# [Tag "multi word tag"],
+    lexes #" '"" "# [Tag ""],
    lexFail #" ' "#,
    lexFail #" '' "#
  ],

  "strings" :- [
-    lexes #" "" "# [S ""],
-    lexes #" "abc" "# [S "abc"],
-    lexes #" "\"" "# [S "\""],
-    lexes #" "\\" "# [S "\\"],
-    lexes #" "\\\"" "# [S "\\\""],
+    lexes #" "" "# [Str ""],
+    lexes #" "abc" "# [Str "abc"],
+    lexes #" "\"" "# [Str "\""],
+    lexes #" "\\" "# [Str "\\"],
+    lexes #" "\\\"" "# [Str "\\\""],
    todo "other escapes"
  ],

+  todo "naturals",
+
  "universes" :- [
    lexes "Type0" [TYPE 0],
    lexes "Type₀" [TYPE 0],
@ -144,7 +151,7 @@ tests = "lexer" :- [
    lexes "★₀" [TYPE 0],
    lexes "★₆₉" [TYPE 69],
    lexes "★4" [TYPE 4],
-    lexes "Type" [R "★"],
-    lexes "★" [R "★"]
+    lexes "Type" [Reserved "★"],
+    lexes "★" [Reserved "★"]
  ]
 ]