diff --git a/lib/Quox/Parser/FromParser/Error.idr b/lib/Quox/Parser/FromParser/Error.idr index d561df7..301ff59 100644 --- a/lib/Quox/Parser/FromParser/Error.idr +++ b/lib/Quox/Parser/FromParser/Error.idr @@ -39,12 +39,14 @@ data Error = export prettyLexError : {opts : _} -> String -> LexError -> Eff Pretty (Doc opts) prettyLexError file (Err reason line col char) = do - let loc = makeLoc file (MkBounds line col line col) reason <- case reason of - EndInput => pure "unexpected end of input" - NoRuleApply => pure $ text "unrecognised character: \{show char}" + Other msg => pure $ text msg + NoRuleApply => case char of + Just char => pure $ text "unrecognised character: \{show char}" + Nothing => pure $ text "unexpected end of input" ComposeNotClosing (sl, sc) (el, ec) => pure $ hsep ["unterminated token at", !(prettyBounds (MkBounds sl sc el ec))] + let loc = makeLoc file (MkBounds line col line col) pure $ vappend !(prettyLoc loc) reason export diff --git a/lib/Quox/Parser/Lexer.idr b/lib/Quox/Parser/Lexer.idr index 57b7e96..eb71ad9 100644 --- a/lib/Quox/Parser/Lexer.idr +++ b/lib/Quox/Parser/Lexer.idr @@ -34,16 +34,27 @@ data Token = | Sup Nat %runElab derive "Token" [Eq, Ord, Show] --- token or whitespace +||| token or whitespace +||| @ Skip whitespace, comments, etc +||| @ Invalid a token which failed a post-lexer check +||| (e.g. a qualified name containing a keyword) +||| @ T a well formed token public export -0 TokenW : Type -TokenW = Maybe Token +data ExtToken = Skip | Invalid String String | T Token +%runElab derive "ExtToken" [Eq, Ord, Show] +public export +data ErrorReason = + NoRuleApply + | ComposeNotClosing (Int, Int) (Int, Int) + | Other String +%runElab derive "ErrorReason" [Eq, Ord, Show] + public export record Error where constructor Err - reason : StopReason + reason : ErrorReason line, col : Int ||| `Nothing` if the error is at the end of the input char : Maybe Char @@ -52,19 +63,14 @@ record Error where private -skip : Lexer -> Tokenizer TokenW -skip t = match t $ const Nothing +skip : Lexer -> Tokenizer ExtToken +skip t = match t $ const Skip private -match : Lexer -> (String -> Token) -> Tokenizer TokenW -match t f = Tokenizer.match t (Just . f) -%hide Tokenizer.match +tmatch : Lexer -> (String -> Token) -> Tokenizer ExtToken +tmatch t f = match t (T . f) -private -name : Tokenizer TokenW -name = match name $ Name . fromListP . split (== '.') . normalizeNfc - ||| [todo] escapes other than `\"` and (accidentally) `\\` export fromStringLit : String -> String @@ -76,17 +82,17 @@ fromStringLit = pack . go . unpack . drop 1 . dropLast 1 where go (c :: cs) = c :: go cs private -string : Tokenizer TokenW -string = match stringLit (Str . fromStringLit) +string : Tokenizer ExtToken +string = tmatch stringLit (Str . fromStringLit) private -nat : Tokenizer TokenW -nat = match (some (range '0' '9')) (Nat . cast) +nat : Tokenizer ExtToken +nat = tmatch (some (range '0' '9')) (Nat . cast) private -tag : Tokenizer TokenW -tag = match (is '\'' <+> name) (Tag . drop 1) - <|> match (is '\'' <+> stringLit) (Tag . fromStringLit . drop 1) +tag : Tokenizer ExtToken +tag = tmatch (is '\'' <+> name) (Tag . drop 1) + <|> tmatch (is '\'' <+> stringLit) (Tag . fromStringLit . drop 1) @@ -112,17 +118,17 @@ supToNat = cast . pack . map fromSup . unpack -- ★0, Type0. base ★/Type is a Reserved private -universe : Tokenizer TokenW +universe : Tokenizer ExtToken universe = universeWith "★" <|> universeWith "Type" where - universeWith : String -> Tokenizer TokenW + universeWith : String -> Tokenizer ExtToken universeWith pfx = let len = length pfx in - match (exact pfx <+> digits) (TYPE . cast . drop len) + tmatch (exact pfx <+> digits) (TYPE . cast . drop len) private -sup : Tokenizer TokenW -sup = match (some $ pred isSupDigit) (Sup . supToNat) - <|> match (is '^' <+> digits) (Sup . cast . drop 1) +sup : Tokenizer ExtToken +sup = tmatch (some $ pred isSupDigit) (Sup . supToNat) + <|> tmatch (is '^' <+> digits) (Sup . cast . drop 1) private %inline @@ -165,17 +171,23 @@ resString : Reserved -> String resString (Only r) = resString1 r resString (r `Or` _) = resString1 r +||| return both representative strings for a token description +public export +resString2 : Reserved -> List String +resString2 (Only r) = [resString1 r] +resString2 (r `Or` s) = [resString1 r, resString1 s] + private -resTokenizer1 : Reserved1 -> String -> Tokenizer TokenW +resTokenizer1 : Reserved1 -> String -> Tokenizer ExtToken resTokenizer1 r str = let res : String -> Token := const $ Reserved str in - case r of Word w => match (exact w <+> reject idContEnd) res - Sym s => match (exact s <+> reject symCont) res - Punc x => match (exact x) res + case r of Word w => tmatch (exact w <+> reject idContEnd) res + Sym s => tmatch (exact s <+> reject symCont) res + Punc x => tmatch (exact x) res ||| match a reserved token export -resTokenizer : Reserved -> Tokenizer TokenW +resTokenizer : Reserved -> Tokenizer ExtToken resTokenizer (Only r) = resTokenizer1 r (resString1 r) resTokenizer (r `Or` s) = resTokenizer1 r (resString1 r) <|> resTokenizer1 s (resString1 r) @@ -219,14 +231,31 @@ reserved = Word1 "load", Word1 "namespace"] +public export +reservedStrings : List String +reservedStrings = map resString reserved + +public export +allReservedStrings : List String +allReservedStrings = foldMap resString2 reserved + ||| `IsReserved str` is true if `Reserved str` might actually show up in ||| the token stream public export IsReserved : String -> Type -IsReserved str = str `Elem` map resString reserved +IsReserved str = So (str `elem` allReservedStrings) + +private +name : Tokenizer ExtToken +name = + match name $ \str => + let parts = split (== '.') $ normalizeNfc str in + case find (`elem` allReservedStrings) (toList parts) of + Nothing => T $ Name $ fromListP parts + Just w => Invalid "reserved word '\{w}' inside name \{str}" str export -tokens : Tokenizer TokenW +tokens : Tokenizer ExtToken tokens = choice $ map skip [pred isWhitespace, lineComment (exact "--" <+> reject symCont), @@ -235,10 +264,24 @@ tokens = choice $ map resTokenizer reserved <+> [sup, nat, string, tag, name] +export +check : Alternative f => + WithBounds ExtToken -> Either Error (f (WithBounds Token)) +check (MkBounded val irr bounds@(MkBounds line col _ _)) = case val of + Skip => Right empty + T tok => Right $ pure $ MkBounded tok irr bounds + Invalid msg tok => Left $ Err (Other msg) line col (index 0 tok) + +export +toErrorReason : StopReason -> Maybe ErrorReason +toErrorReason EndInput = Nothing +toErrorReason NoRuleApply = Just NoRuleApply +toErrorReason (ComposeNotClosing s e) = Just $ ComposeNotClosing s e + export lex : String -> Either Error (List (WithBounds Token)) lex str = let (res, reason, line, col, str) = lex tokens str in - case reason of - EndInput => Right $ mapMaybe sequence res - _ => Left $ Err {reason, line, col, char = index 0 str} + case toErrorReason reason of + Nothing => concatMap check res @{MonoidApplicative} + Just e => Left $ Err {reason = e, line, col, char = index 0 str} diff --git a/tests/Tests/Lexer.idr b/tests/Tests/Lexer.idr index 7823d5d..910a434 100644 --- a/tests/Tests/Lexer.idr +++ b/tests/Tests/Lexer.idr @@ -47,7 +47,12 @@ tests = "lexer" :- [ lexes " " [], lexes "-- line comment" [], lexes "name -- line comment" [Name "name"], - lexes "-- line comment\nnameBetween -- and another" [Name "nameBetween"], + lexes + """ + -- line comment + nameBetween -- and another + """ + [Name "nameBetween"], lexes "{- block comment -}" [], lexes "{- {- nested -} block comment -}" [] ], @@ -70,13 +75,14 @@ tests = "lexer" :- [ lexes "normalïse" [Name "normalïse"], -- ↑ replace i + combining ¨ with precomposed ï lexes "map#" [Name "map#"], + lexes "map#[" [Name "map#", Reserved "["], -- don't actually do this + lexes "map #[" [Name "map", Reserved "#["], lexes "write!" [Name "write!"], lexes "uhh??!?!?!?" [Name "uhh??!?!?!?"], - todo "check for reserved words in a qname", - skip $ - lexes "abc.fun.def" - [Name "abc", Reserved ".", Reserved "λ", Reserved ".", Name "def"], + lexFail "abc.fun.ghi", + lexFail "abc.λ.ghi", + lexFail "abc.ω.ghi", lexes "+" [Name "+"], lexes "*" [Name "*"],