From 699c6a5ca18d2ea4e83125e58c836e50de1bb1b0 Mon Sep 17 00:00:00 2001 From: rhiannon morris Date: Tue, 3 May 2022 02:03:22 +0200 Subject: [PATCH] token stuff --- src/Quox/Lexer.idr | 83 +++++++++++++++++---------------------- tests/src/Tests/Lexer.idr | 52 +++++++++++++----------- 2 files changed, 65 insertions(+), 70 deletions(-) diff --git a/src/Quox/Lexer.idr b/src/Quox/Lexer.idr index feaaaf4..fda0c70 100644 --- a/src/Quox/Lexer.idr +++ b/src/Quox/Lexer.idr @@ -4,6 +4,7 @@ import Quox.Error import Data.String import public Text.Lexer +import public Text.Lexer.Tokenizer import Generics.Derive %default total @@ -13,6 +14,7 @@ import Generics.Derive public export record Error where constructor Err + reason : StopReason line, col : Int char : Char @@ -32,30 +34,13 @@ data Punc public export -data Kind +data Token = P Punc -| Name | Symbol +| Name String | Symbol String -%runElab derive "Kind" [Generic, Meta, Eq, Ord, DecEq, Show] +%runElab derive "Token" [Generic, Meta, Eq, Ord, DecEq, Show] -export -TokenKind Kind where - TokType (P _) = () - TokType Name = String - TokType Symbol = String - - tokValue (P _) _ = () - tokValue Name x = x - tokValue Symbol x = assert_total strTail x - - -Token' = Token (Maybe Kind) -Token = Token Kind - -TokenMap' = TokenMap Token' -TokenMap = TokenMap Token - nameStart = pred $ \c => isAlpha c || c == '_' nameCont = pred $ \c => isAlphaNum c || c == '_' || c == '\'' @@ -67,37 +52,43 @@ wild = exact "_" <+> reject nameCont symbol = exact "'" <+> name -tokens = toTokenMap [ - (lineComment (exact "--"), Nothing), - (blockComment (exact "{-") (exact "-}"), Nothing), - (spaces, Nothing), +skip : Lexer -> Tokenizer (Maybe a) +skip lex = match lex $ const Nothing - (exact "(", Just $ P LParen), (exact ")", Just $ P RParen), - (exact "[", Just $ P LSquare), (exact "]", Just $ P RSquare), - (exact "{", Just $ P LBrace), (exact "}", Just $ P RBrace), - (exact ",", Just $ P Comma), - (exact "::" <|> exact "∷", Just $ P DblColon), - (exact ":", Just $ P Colon), +simple : List String -> a -> Tokenizer (Maybe a) +simple str x = match (choice $ map exact str) $ const $ Just x - (exact "->" <|> exact "→", Just $ P Arrow), - (exact "=>" <|> exact "⇒", Just $ P DblArrow), - (exact "**" <|> exact "×", Just $ P Times), - (exact "<<" <|> exact "⊲", Just $ P Triangle), - (wild, Just $ P Wild), +choice : (xs : List (Tokenizer a)) -> {auto 0 _ : So (isCons xs)} -> Tokenizer a +choice (t :: ts) = foldl (\a, b => a <|> b) t ts - (name, Just Name), (symbol, Just Symbol) +tokens : Tokenizer (Maybe Token) +tokens = choice [ + skip $ lineComment $ exact "--", + skip $ blockComment (exact "{-") (exact "-}"), + skip spaces, + + simple ["("] $ P LParen, simple [")"] $ P RParen, + simple ["["] $ P LSquare, simple ["]"] $ P RSquare, + simple ["{"] $ P LBrace, simple ["}"] $ P RBrace, + simple [","] $ P Comma, + simple ["::", "∷"] $ P DblColon, + simple [":"] $ P Colon, + + simple ["->", "→"] $ P Arrow, + simple ["=>", "⇒"] $ P DblArrow, + simple ["**", "×"] $ P Times, + simple ["<<", "⊲"] $ P Triangle, + match wild $ const $ Just $ P Wild, + + match name $ Just . Name, + match symbol $ Just . Symbol . assert_total strTail ] -sequenceT : Token (Maybe Kind) -> Maybe (Token Kind) -sequenceT tok = - case tok.kind of - Just k => Just $ {kind := k} tok - Nothing => Nothing - export lex : MonadThrow Error m => String -> m (List (WithBounds Token)) lex str = - let (res, (line, col, str)) = lex tokens str in - case asList str of - [] => pure $ mapMaybe (traverse sequenceT) res - c :: _ => throw $ Err {line, col, char = c} + let (res, (reason, line, col, str)) = lex tokens str in + case reason of + EndInput => pure $ mapMaybe sequence res + _ => let char = assert_total strIndex str 0 in + throw $ Err {reason, line, col, char} diff --git a/tests/src/Tests/Lexer.idr b/tests/src/Tests/Lexer.idr index a9cf3f5..ceb67c6 100644 --- a/tests/src/Tests/Lexer.idr +++ b/tests/src/Tests/Lexer.idr @@ -6,32 +6,36 @@ import TAP export ToInfo Error where - toInfo (Err line col char) = - [("line", show line), ("col", show col), ("char", show char)] + toInfo (Err reason line col char) = + [("reason", show reason), + ("line", show line), + ("col", show col), + ("char", show char)] data ExtraError -= WrongAnswer (List Kind) (List Kind) -| TestFailed (List Kind) += WrongAnswer (List Token) (List Token) +| TestFailed (List Token) ToInfo ExtraError where toInfo (WrongAnswer exp got) = [("expected", show exp), ("received", show got)] - toInfo (TestFailed got) = [("failed", show got)] + toInfo (TestFailed got) = + [("failed", show got)] parameters (label : String) (input : String) - acceptsSuchThat' : (List Kind -> Maybe ExtraError) -> Test + acceptsSuchThat' : (List Token -> Maybe ExtraError) -> Test acceptsSuchThat' p = test {es = [Lexer.Error, ExtraError]} label $ do - res <- map (kind . val) <$> lex input + res <- map val <$> lex input case p res of Just err => throw err Nothing => pure () - acceptsSuchThat : (List Kind -> Bool) -> Test + acceptsSuchThat : (List Token -> Bool) -> Test acceptsSuchThat p = acceptsSuchThat' $ \res => if p res then Nothing else Just $ TestFailed res - acceptsWith : List Kind -> Test + acceptsWith : List Token -> Test acceptsWith expect = acceptsSuchThat' $ \res => if res == expect then Nothing else Just $ WrongAnswer expect res @@ -40,13 +44,13 @@ parameters (label : String) (input : String) rejects : Test rejects = testThrows {es = [Lexer.Error]} label $ delay $ - map (kind . val) <$> lex input + map val <$> lex input parameters (input : String) {default False esc : Bool} show' : String -> String show' s = if esc then show s else "\"\{s}\"" - acceptsWith' : List Kind -> Test + acceptsWith' : List Token -> Test acceptsWith' = acceptsWith (show' input) input accepts' : Test @@ -87,19 +91,19 @@ tests = "lexer" :- [ ], "names & symbols" :- [ - acceptsWith' "a" [Name], - acceptsWith' "abc" [Name], - acceptsWith' "_a" [Name], - acceptsWith' "a_" [Name], - acceptsWith' "a_b" [Name], - acceptsWith' "abc'" [Name], - acceptsWith' "a'b'c''" [Name], - acceptsWith' "abc123" [Name], - acceptsWith' "ab cd" [Name, Name], - acceptsWith' "ab{--}cd" [Name, Name], - acceptsWith' "'a" [Symbol], - acceptsWith' "'ab" [Symbol], - acceptsWith' "'_b" [Symbol], + acceptsWith' "a" [Name "a"], + acceptsWith' "abc" [Name "abc"], + acceptsWith' "_a" [Name "_a"], + acceptsWith' "a_" [Name "a_"], + acceptsWith' "a_b" [Name "a_b"], + acceptsWith' "abc'" [Name "abc'"], + acceptsWith' "a'b'c''" [Name "a'b'c''"], + acceptsWith' "abc123" [Name "abc123"], + acceptsWith' "ab cd" [Name "ab", Name "cd"], + acceptsWith' "ab{--}cd" [Name "ab", Name "cd"], + acceptsWith' "'a" [Symbol "a"], + acceptsWith' "'ab" [Symbol "ab"], + acceptsWith' "'_b" [Symbol "_b"], rejects' "'" ] ]