2023-09-24 17:39:29 +02:00
3 changed files with 95 additions and 44 deletions
--- a/lib/Quox/Parser/FromParser/Error.idr
+++ b/lib/Quox/Parser/FromParser/Error.idr
@ -39,12 +39,14 @@ data Error =
 export
 prettyLexError : {opts : _} -> String -> LexError -> Eff Pretty (Doc opts)
 prettyLexError file (Err reason line col char) = do
  let loc = makeLoc file (MkBounds line col line col)
  reason <- case reason of
-    EndInput    => pure "unexpected end of input"
+    Other msg => pure $ text msg
-    NoRuleApply => pure $ text "unrecognised character: \{show char}"
+    NoRuleApply => case char of
      Just char => pure $ text "unrecognised character: \{show char}"
      Nothing   => pure $ text "unexpected end of input"
    ComposeNotClosing (sl, sc) (el, ec) => pure $
      hsep ["unterminated token at", !(prettyBounds (MkBounds sl sc el ec))]
  let loc = makeLoc file (MkBounds line col line col)
  pure $ vappend !(prettyLoc loc) reason
 export
--- a/lib/Quox/Parser/Lexer.idr
+++ b/lib/Quox/Parser/Lexer.idr
@ -34,16 +34,27 @@ data Token =
  | Sup Nat
 %runElab derive "Token" [Eq, Ord, Show]
-- token or whitespace
+||| token or whitespace
 ||| @ Skip whitespace, comments, etc
 ||| @ Invalid a token which failed a post-lexer check
 |||           (e.g. a qualified name containing a keyword)
 ||| @ T a well formed token
 public export
-0 TokenW : Type
+data ExtToken = Skip | Invalid String String | T Token
-TokenW = Maybe Token
+%runElab derive "ExtToken" [Eq, Ord, Show]
 public export
 data ErrorReason =
    NoRuleApply
  | ComposeNotClosing (Int, Int) (Int, Int)
  | Other String
 %runElab derive "ErrorReason" [Eq, Ord, Show]
 public export
 record Error where
  constructor Err
-  reason    : StopReason
+  reason    : ErrorReason
  line, col : Int
  ||| `Nothing` if the error is at the end of the input
  char      : Maybe Char
@ -52,19 +63,14 @@ record Error where
 private
-skip : Lexer -> Tokenizer TokenW
+skip : Lexer -> Tokenizer ExtToken
-skip t = match t $ const Nothing
+skip t = match t $ const Skip
 private
-match : Lexer -> (String -> Token) -> Tokenizer TokenW
+tmatch : Lexer -> (String -> Token) -> Tokenizer ExtToken
-match t f = Tokenizer.match t (Just . f)
+tmatch t f = match t (T . f)
 %hide Tokenizer.match
 private
 name : Tokenizer TokenW
 name = match name $ Name . fromListP . split (== '.') . normalizeNfc
 ||| [todo] escapes other than `\"` and (accidentally) `\\`
 export
 fromStringLit : String -> String
@ -76,17 +82,17 @@ fromStringLit = pack . go . unpack . drop 1 . dropLast 1 where
  go (c :: cs)         = c :: go cs
 private
-string : Tokenizer TokenW
+string : Tokenizer ExtToken
-string = match stringLit (Str . fromStringLit)
+string = tmatch stringLit (Str . fromStringLit)
 private
-nat : Tokenizer TokenW
+nat : Tokenizer ExtToken
-nat = match (some (range '0' '9')) (Nat . cast)
+nat = tmatch (some (range '0' '9')) (Nat . cast)
 private
-tag : Tokenizer TokenW
+tag : Tokenizer ExtToken
-tag = match (is '\'' <+> name)      (Tag . drop 1)
+tag = tmatch (is '\'' <+> name)      (Tag . drop 1)
-  <|> match (is '\'' <+> stringLit) (Tag . fromStringLit . drop 1)
+  <|> tmatch (is '\'' <+> stringLit) (Tag . fromStringLit . drop 1)
@ -112,17 +118,17 @@ supToNat = cast . pack . map fromSup . unpack
 -- ★0, Type0. base ★/Type is a Reserved
 private
-universe : Tokenizer TokenW
+universe : Tokenizer ExtToken
 universe = universeWith "★" <|> universeWith "Type" where
-  universeWith : String -> Tokenizer TokenW
+  universeWith : String -> Tokenizer ExtToken
  universeWith pfx =
    let len = length pfx in
-    match (exact pfx <+> digits) (TYPE . cast . drop len)
+    tmatch (exact pfx <+> digits) (TYPE . cast . drop len)
 private
-sup : Tokenizer TokenW
+sup : Tokenizer ExtToken
-sup = match (some $ pred isSupDigit) (Sup . supToNat)
+sup = tmatch (some $ pred isSupDigit) (Sup . supToNat)
-  <|> match (is '^' <+> digits)      (Sup . cast . drop 1)
+  <|> tmatch (is '^' <+> digits)      (Sup . cast . drop 1)
 private %inline
@ -165,17 +171,23 @@ resString : Reserved -> String
 resString (Only r)   = resString1 r
 resString (r `Or` _) = resString1 r
 ||| return both representative strings for a token description
 public export
 resString2 : Reserved -> List String
 resString2 (Only r)   = [resString1 r]
 resString2 (r `Or` s) = [resString1 r, resString1 s]
 private
-resTokenizer1 : Reserved1 -> String -> Tokenizer TokenW
+resTokenizer1 : Reserved1 -> String -> Tokenizer ExtToken
 resTokenizer1 r str =
  let res : String -> Token := const $ Reserved str in
-  case r of Word w => match (exact w <+> reject idContEnd) res
+  case r of Word w => tmatch (exact w <+> reject idContEnd) res
-            Sym  s => match (exact s <+> reject symCont)   res
+            Sym  s => tmatch (exact s <+> reject symCont)   res
-            Punc x => match (exact x)                      res
+            Punc x => tmatch (exact x)                      res
 ||| match a reserved token
 export
-resTokenizer : Reserved -> Tokenizer TokenW
+resTokenizer : Reserved -> Tokenizer ExtToken
 resTokenizer (Only r)   = resTokenizer1 r (resString1 r)
 resTokenizer (r `Or` s) =
  resTokenizer1 r (resString1 r) <|> resTokenizer1 s (resString1 r)
@ -219,14 +231,31 @@ reserved =
   Word1 "load",
   Word1 "namespace"]
 public export
 reservedStrings : List String
 reservedStrings = map resString reserved
 public export
 allReservedStrings : List String
 allReservedStrings = foldMap resString2 reserved
 ||| `IsReserved str` is true if `Reserved str` might actually show up in
 ||| the token stream
 public export
 IsReserved : String -> Type
-IsReserved str = str `Elem` map resString reserved
+IsReserved str = So (str `elem` allReservedStrings)
 private
 name : Tokenizer ExtToken
 name =
  match name $ \str =>
    let parts = split (== '.') $ normalizeNfc str in
    case find (`elem` allReservedStrings) (toList parts) of
      Nothing => T $ Name $ fromListP parts
      Just w  => Invalid "reserved word '\{w}' inside name \{str}" str
 export
-tokens : Tokenizer TokenW
+tokens : Tokenizer ExtToken
 tokens = choice $
  map skip [pred isWhitespace,
            lineComment (exact "--" <+> reject symCont),
@ -235,10 +264,24 @@ tokens = choice $
  map resTokenizer reserved <+>
  [sup, nat, string, tag, name]
 export
 check : Alternative f =>
        WithBounds ExtToken -> Either Error (f (WithBounds Token))
 check (MkBounded val irr bounds@(MkBounds line col _ _)) = case val of
  Skip            => Right empty
  T tok           => Right $ pure $ MkBounded tok irr bounds
  Invalid msg tok => Left  $ Err (Other msg) line col (index 0 tok)
 export
 toErrorReason : StopReason -> Maybe ErrorReason
 toErrorReason EndInput                = Nothing
 toErrorReason NoRuleApply             = Just NoRuleApply
 toErrorReason (ComposeNotClosing s e) = Just $ ComposeNotClosing s e
 export
 lex : String -> Either Error (List (WithBounds Token))
 lex str =
  let (res, reason, line, col, str) = lex tokens str in
-  case reason of
+  case toErrorReason reason of
-    EndInput => Right $ mapMaybe sequence res
+    Nothing => concatMap check res @{MonoidApplicative}
-    _        => Left  $ Err {reason, line, col, char = index 0 str}
+    Just e  => Left $ Err {reason = e, line, col, char = index 0 str}
--- a/tests/Tests/Lexer.idr
+++ b/tests/Tests/Lexer.idr
@ -47,7 +47,12 @@ tests = "lexer" :- [
    lexes "    " [],
    lexes "-- line comment" [],
    lexes "name -- line comment" [Name "name"],
-    lexes "-- line comment\nnameBetween -- and another" [Name "nameBetween"],
+    lexes
      """
      -- line comment
      nameBetween -- and another
      """
      [Name "nameBetween"],
    lexes "{- block comment -}" [],
    lexes "{- {- nested -} block comment -}" []
  ],
@ -70,13 +75,14 @@ tests = "lexer" :- [
    lexes "normalïse" [Name "normalïse"],
      -- ↑ replace i + combining ¨ with precomposed ï
    lexes "map#" [Name "map#"],
    lexes "map#[" [Name "map#", Reserved "["], -- don't actually do this
    lexes "map #[" [Name "map", Reserved "#["],
    lexes "write!" [Name "write!"],
    lexes "uhh??!?!?!?" [Name "uhh??!?!?!?"],
-    todo "check for reserved words in a qname",
+    lexFail "abc.fun.ghi",
-    skip $
+    lexFail "abc.λ.ghi",
-    lexes "abc.fun.def"
+    lexFail "abc.ω.ghi",
      [Name "abc", Reserved ".", Reserved "λ", Reserved ".", Name "def"],
    lexes "+" [Name "+"],
    lexes "*" [Name "*"],