From a51073746215856d6db399923def5f40b41ea518 Mon Sep 17 00:00:00 2001 From: rhiannon morris Date: Tue, 3 May 2022 02:02:25 +0200 Subject: [PATCH 1/4] .gitignore --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index afc6d59..202b75d 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,5 @@ build depends *.ipkg *~ +quox +quox-tests From 699c6a5ca18d2ea4e83125e58c836e50de1bb1b0 Mon Sep 17 00:00:00 2001 From: rhiannon morris Date: Tue, 3 May 2022 02:03:22 +0200 Subject: [PATCH 2/4] token stuff --- src/Quox/Lexer.idr | 83 +++++++++++++++++---------------------- tests/src/Tests/Lexer.idr | 52 +++++++++++++----------- 2 files changed, 65 insertions(+), 70 deletions(-) diff --git a/src/Quox/Lexer.idr b/src/Quox/Lexer.idr index feaaaf4..fda0c70 100644 --- a/src/Quox/Lexer.idr +++ b/src/Quox/Lexer.idr @@ -4,6 +4,7 @@ import Quox.Error import Data.String import public Text.Lexer +import public Text.Lexer.Tokenizer import Generics.Derive %default total @@ -13,6 +14,7 @@ import Generics.Derive public export record Error where constructor Err + reason : StopReason line, col : Int char : Char @@ -32,30 +34,13 @@ data Punc public export -data Kind +data Token = P Punc -| Name | Symbol +| Name String | Symbol String -%runElab derive "Kind" [Generic, Meta, Eq, Ord, DecEq, Show] +%runElab derive "Token" [Generic, Meta, Eq, Ord, DecEq, Show] -export -TokenKind Kind where - TokType (P _) = () - TokType Name = String - TokType Symbol = String - - tokValue (P _) _ = () - tokValue Name x = x - tokValue Symbol x = assert_total strTail x - - -Token' = Token (Maybe Kind) -Token = Token Kind - -TokenMap' = TokenMap Token' -TokenMap = TokenMap Token - nameStart = pred $ \c => isAlpha c || c == '_' nameCont = pred $ \c => isAlphaNum c || c == '_' || c == '\'' @@ -67,37 +52,43 @@ wild = exact "_" <+> reject nameCont symbol = exact "'" <+> name -tokens = toTokenMap [ - (lineComment (exact "--"), Nothing), - (blockComment (exact "{-") (exact "-}"), Nothing), - (spaces, Nothing), +skip : Lexer -> Tokenizer (Maybe a) +skip lex = match lex $ const Nothing - (exact "(", Just $ P LParen), (exact ")", Just $ P RParen), - (exact "[", Just $ P LSquare), (exact "]", Just $ P RSquare), - (exact "{", Just $ P LBrace), (exact "}", Just $ P RBrace), - (exact ",", Just $ P Comma), - (exact "::" <|> exact "∷", Just $ P DblColon), - (exact ":", Just $ P Colon), +simple : List String -> a -> Tokenizer (Maybe a) +simple str x = match (choice $ map exact str) $ const $ Just x - (exact "->" <|> exact "→", Just $ P Arrow), - (exact "=>" <|> exact "⇒", Just $ P DblArrow), - (exact "**" <|> exact "×", Just $ P Times), - (exact "<<" <|> exact "⊲", Just $ P Triangle), - (wild, Just $ P Wild), +choice : (xs : List (Tokenizer a)) -> {auto 0 _ : So (isCons xs)} -> Tokenizer a +choice (t :: ts) = foldl (\a, b => a <|> b) t ts - (name, Just Name), (symbol, Just Symbol) +tokens : Tokenizer (Maybe Token) +tokens = choice [ + skip $ lineComment $ exact "--", + skip $ blockComment (exact "{-") (exact "-}"), + skip spaces, + + simple ["("] $ P LParen, simple [")"] $ P RParen, + simple ["["] $ P LSquare, simple ["]"] $ P RSquare, + simple ["{"] $ P LBrace, simple ["}"] $ P RBrace, + simple [","] $ P Comma, + simple ["::", "∷"] $ P DblColon, + simple [":"] $ P Colon, + + simple ["->", "→"] $ P Arrow, + simple ["=>", "⇒"] $ P DblArrow, + simple ["**", "×"] $ P Times, + simple ["<<", "⊲"] $ P Triangle, + match wild $ const $ Just $ P Wild, + + match name $ Just . Name, + match symbol $ Just . Symbol . assert_total strTail ] -sequenceT : Token (Maybe Kind) -> Maybe (Token Kind) -sequenceT tok = - case tok.kind of - Just k => Just $ {kind := k} tok - Nothing => Nothing - export lex : MonadThrow Error m => String -> m (List (WithBounds Token)) lex str = - let (res, (line, col, str)) = lex tokens str in - case asList str of - [] => pure $ mapMaybe (traverse sequenceT) res - c :: _ => throw $ Err {line, col, char = c} + let (res, (reason, line, col, str)) = lex tokens str in + case reason of + EndInput => pure $ mapMaybe sequence res + _ => let char = assert_total strIndex str 0 in + throw $ Err {reason, line, col, char} diff --git a/tests/src/Tests/Lexer.idr b/tests/src/Tests/Lexer.idr index a9cf3f5..ceb67c6 100644 --- a/tests/src/Tests/Lexer.idr +++ b/tests/src/Tests/Lexer.idr @@ -6,32 +6,36 @@ import TAP export ToInfo Error where - toInfo (Err line col char) = - [("line", show line), ("col", show col), ("char", show char)] + toInfo (Err reason line col char) = + [("reason", show reason), + ("line", show line), + ("col", show col), + ("char", show char)] data ExtraError -= WrongAnswer (List Kind) (List Kind) -| TestFailed (List Kind) += WrongAnswer (List Token) (List Token) +| TestFailed (List Token) ToInfo ExtraError where toInfo (WrongAnswer exp got) = [("expected", show exp), ("received", show got)] - toInfo (TestFailed got) = [("failed", show got)] + toInfo (TestFailed got) = + [("failed", show got)] parameters (label : String) (input : String) - acceptsSuchThat' : (List Kind -> Maybe ExtraError) -> Test + acceptsSuchThat' : (List Token -> Maybe ExtraError) -> Test acceptsSuchThat' p = test {es = [Lexer.Error, ExtraError]} label $ do - res <- map (kind . val) <$> lex input + res <- map val <$> lex input case p res of Just err => throw err Nothing => pure () - acceptsSuchThat : (List Kind -> Bool) -> Test + acceptsSuchThat : (List Token -> Bool) -> Test acceptsSuchThat p = acceptsSuchThat' $ \res => if p res then Nothing else Just $ TestFailed res - acceptsWith : List Kind -> Test + acceptsWith : List Token -> Test acceptsWith expect = acceptsSuchThat' $ \res => if res == expect then Nothing else Just $ WrongAnswer expect res @@ -40,13 +44,13 @@ parameters (label : String) (input : String) rejects : Test rejects = testThrows {es = [Lexer.Error]} label $ delay $ - map (kind . val) <$> lex input + map val <$> lex input parameters (input : String) {default False esc : Bool} show' : String -> String show' s = if esc then show s else "\"\{s}\"" - acceptsWith' : List Kind -> Test + acceptsWith' : List Token -> Test acceptsWith' = acceptsWith (show' input) input accepts' : Test @@ -87,19 +91,19 @@ tests = "lexer" :- [ ], "names & symbols" :- [ - acceptsWith' "a" [Name], - acceptsWith' "abc" [Name], - acceptsWith' "_a" [Name], - acceptsWith' "a_" [Name], - acceptsWith' "a_b" [Name], - acceptsWith' "abc'" [Name], - acceptsWith' "a'b'c''" [Name], - acceptsWith' "abc123" [Name], - acceptsWith' "ab cd" [Name, Name], - acceptsWith' "ab{--}cd" [Name, Name], - acceptsWith' "'a" [Symbol], - acceptsWith' "'ab" [Symbol], - acceptsWith' "'_b" [Symbol], + acceptsWith' "a" [Name "a"], + acceptsWith' "abc" [Name "abc"], + acceptsWith' "_a" [Name "_a"], + acceptsWith' "a_" [Name "a_"], + acceptsWith' "a_b" [Name "a_b"], + acceptsWith' "abc'" [Name "abc'"], + acceptsWith' "a'b'c''" [Name "a'b'c''"], + acceptsWith' "abc123" [Name "abc123"], + acceptsWith' "ab cd" [Name "ab", Name "cd"], + acceptsWith' "ab{--}cd" [Name "ab", Name "cd"], + acceptsWith' "'a" [Symbol "a"], + acceptsWith' "'ab" [Symbol "ab"], + acceptsWith' "'_b" [Symbol "_b"], rejects' "'" ] ] From fa5beb4e2b9bafd724555c548ed5d83b79d3899b Mon Sep 17 00:00:00 2001 From: rhiannon morris Date: Wed, 4 May 2022 00:49:09 +0200 Subject: [PATCH 3/4] decimal numbers --- src/Quox/Lexer.idr | 37 ++++++++++++++++++++++++++++++++++++- tests/src/Tests/Lexer.idr | 19 +++++++++++++++---- 2 files changed, 51 insertions(+), 5 deletions(-) diff --git a/src/Quox/Lexer.idr b/src/Quox/Lexer.idr index fda0c70..f64617c 100644 --- a/src/Quox/Lexer.idr +++ b/src/Quox/Lexer.idr @@ -32,11 +32,19 @@ data Punc %runElab derive "Punc" [Generic, Meta, Eq, Ord, DecEq, Show] +||| zero and one are separate because they are +||| quantity & dimension constants +public export +data Number = Zero | One | Other Nat + +%runElab derive "Number" [Generic, Meta, Eq, Ord, DecEq, Show] + public export data Token = P Punc | Name String | Symbol String +| N Number %runElab derive "Token" [Generic, Meta, Eq, Ord, DecEq, Show] @@ -52,6 +60,30 @@ wild = exact "_" <+> reject nameCont symbol = exact "'" <+> name +decimal : Lexer +decimal = + digit <+> opt (many (digit <|> is '_') <+> digit) + +natToNumber : Nat -> Number +natToNumber 0 = Zero +natToNumber 1 = One +natToNumber k = Other k + +toDigit : Char -> Nat +toDigit c = cast $ ord c - ord '0' + +makeDec' : Nat -> String -> Maybe Nat +makeDec' acc x with (asList x) + makeDec' acc "" | [] = pure acc + makeDec' acc (strCons '_' str) | '_' :: lst = makeDec' acc str | lst + makeDec' acc (strCons d str) | d :: lst = + if d >= '0' && d <= '9' then + makeDec' (acc * 10 + toDigit d) str | lst + else + Nothing + +makeDec = fromMaybe 0 . makeDec' 0 + skip : Lexer -> Tokenizer (Maybe a) skip lex = match lex $ const Nothing @@ -81,7 +113,10 @@ tokens = choice [ match wild $ const $ Just $ P Wild, match name $ Just . Name, - match symbol $ Just . Symbol . assert_total strTail + match symbol $ Just . Symbol . assert_total strTail, + + -- [todo] other bases? + match (some $ digit <|> exact "_") $ Just . N . natToNumber . makeDec ] export diff --git a/tests/src/Tests/Lexer.idr b/tests/src/Tests/Lexer.idr index ceb67c6..4682612 100644 --- a/tests/src/Tests/Lexer.idr +++ b/tests/src/Tests/Lexer.idr @@ -77,12 +77,12 @@ tests = "lexer" :- [ "punctuation" :- [ acceptsWith' "({[:,::]})" - [P LParen, P LBrace, P LSquare, - P Colon, P Comma, P DblColon, + [P LParen, P LBrace, P LSquare, + P Colon, P Comma, P DblColon, P RSquare, P RBrace, P RParen], acceptsWith' " ( { [ : , :: ] } ) " - [P LParen, P LBrace, P LSquare, - P Colon, P Comma, P DblColon, + [P LParen, P LBrace, P LSquare, + P Colon, P Comma, P DblColon, P RSquare, P RBrace, P RParen], acceptsWith' "-> → => ⇒ ** × << ⊲ ∷" [P Arrow, P Arrow, P DblArrow, P DblArrow, @@ -99,11 +99,22 @@ tests = "lexer" :- [ acceptsWith' "abc'" [Name "abc'"], acceptsWith' "a'b'c''" [Name "a'b'c''"], acceptsWith' "abc123" [Name "abc123"], + acceptsWith' "_1" [Name "_1"], acceptsWith' "ab cd" [Name "ab", Name "cd"], acceptsWith' "ab{--}cd" [Name "ab", Name "cd"], acceptsWith' "'a" [Symbol "a"], acceptsWith' "'ab" [Symbol "ab"], acceptsWith' "'_b" [Symbol "_b"], rejects' "'" + ], + + "numbers" :- [ + acceptsWith' "0" [N Zero], + acceptsWith' "1" [N One], + acceptsWith' "2" [N $ Other 2], + acceptsWith' "69" [N $ Other 69], + acceptsWith' "1_000" [N $ Other 1000], + todo "octal", + todo "hex" ] ] From e13cd50175f9e59565a88d2991b91d1523a1bdab Mon Sep 17 00:00:00 2001 From: rhiannon morris Date: Wed, 4 May 2022 00:49:18 +0200 Subject: [PATCH 4/4] pokes in TAP --- tests/src/TAP.idr | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tests/src/TAP.idr b/tests/src/TAP.idr index f3a5b7e..3f8d63d 100644 --- a/tests/src/TAP.idr +++ b/tests/src/TAP.idr @@ -1,4 +1,5 @@ module TAP +-- [todo] extract this and Quox.Error to their own packages import public Quox.Error @@ -45,7 +46,7 @@ All ToInfo es => ToInfo (OneOf es) where export %inline ToInfo () where toInfo () = [] -export Show a => ToInfo (List (String, a)) where toInfo = map (map show) +export %inline Show a => ToInfo (List (String, a)) where toInfo = map (map show) export @@ -121,15 +122,17 @@ export %inline header : List a -> String header tests = "1..\{show $ length tests}" +private makePrefix : SnocList String -> String makePrefix [<] = "" makePrefix (xs :< x) = foldr (\a, b => "\{a}/\{b}") x xs +private %inline withPrefix : SnocList String -> TestBase -> Test withPrefix pfx b = One $ {label := "[\{makePrefix pfx}] \{b.label}"} b mutual - export + export %inline flattenWith : SnocList String -> List Test -> List Test flattenWith pfx = concatMap (flatten1With pfx) @@ -138,11 +141,11 @@ mutual flatten1With pfx (One t) = [withPrefix pfx t] flatten1With pfx (Group x ts) = flattenWith (pfx :< x) ts -export +export %inline flatten : List Test -> List Test flatten = flattenWith [<] -export +export %inline flatten1 : Test -> List Test flatten1 = flatten1With [<]