From 809a0fc859c9d24765938ebbd9a74ba679e0a25f Mon Sep 17 00:00:00 2001 From: rhiannon morris Date: Sat, 7 May 2022 21:26:16 +0200 Subject: [PATCH] simplify lexical grammar this is a core language after all --- lib/Quox/Lexer.idr | 72 ++++++++++++++----------------------------- tests/Tests/Lexer.idr | 34 +++++--------------- 2 files changed, 31 insertions(+), 75 deletions(-) diff --git a/lib/Quox/Lexer.idr b/lib/Quox/Lexer.idr index d2c4271..511c2e7 100644 --- a/lib/Quox/Lexer.idr +++ b/lib/Quox/Lexer.idr @@ -27,17 +27,12 @@ nameCont = pred $ \c => isAlphaNum c || c == '_' || c == '\'' name = nameStart <+> many nameCont <+> reject nameCont -wild = exact "_" <+> reject nameCont +wild = is '_' <+> reject nameCont %hide Text.Lexer.symbol -symbol = exact "'" <+> name +symbol = is '\'' <+> name -number : Lexer -> Lexer -number char = char <+> many (opt (is '_') <+> char) <+> reject nameCont - -octal = approx "0o" <+> number octDigit -decimal = number digit -hexadecimal = approx "0x" <+> number hexDigit +decimal = some digit <+> reject nameCont natToNumber : Nat -> Number @@ -46,32 +41,11 @@ natToNumber 1 = One natToNumber k = Other k -toHexit : Char -> Nat -toHexit c = cast $ - if '0' <= c && c <= '9' then - ord c - ord '0' - else if 'a' <= c && c <= 'f' then - ord c - ord 'a' + 10 - else if 'A' <= c && c <= 'F' then - ord c - ord 'A' + 10 - else 0 - -parameters (base : Nat) (single : Char -> Nat) - makeNat : Nat -> List Char -> Nat - makeNat acc [] = acc - makeNat acc ('_' :: lst) = makeNat acc lst - makeNat acc (d :: lst) = makeNat (acc * base + single d) lst - -makeOct = makeNat 8 toHexit 0 . unpack -makeDec = makeNat 10 toHexit 0 . unpack -makeHex = makeNat 16 toHexit 0 . unpack - - skip : Lexer -> Tokenizer (Maybe a) skip lex = match lex $ const Nothing -simple : List String -> a -> Tokenizer (Maybe a) -simple strs = match (choice $ map exact strs) . const . Just +simple : Char -> a -> Tokenizer (Maybe a) +simple ch = match (is ch) . const . Just keyword : String -> Keyword -> Tokenizer (Maybe Token) keyword str = match (exact str <+> reject nameCont) . const . Just . K @@ -90,31 +64,31 @@ tokens = choice [ skip $ blockComment (exact "{-") (exact "-}"), skip spaces, - simple ["("] $ P LParen, simple [")"] $ P RParen, - simple ["["] $ P LSquare, simple ["]"] $ P RSquare, - simple ["{"] $ P LBrace, simple ["}"] $ P RBrace, - simple [","] $ P Comma, - simple ["::", "∷"] $ P DblColon, - simple [":"] $ P Colon, -- needs to be after "::" - simple ["."] $ P Dot, + simple '(' $ P LParen, simple ')' $ P RParen, + simple '[' $ P LSquare, simple ']' $ P RSquare, + simple '{' $ P LBrace, simple '}' $ P RBrace, + simple ',' $ P Comma, + simple '∷' $ P DblColon, + simple ':' $ P Colon, -- needs to be after '::' + simple '.' $ P Dot, - simple ["->", "→"] $ P Arrow, - simple ["=>", "⇒"] $ P DblArrow, - simple ["**", "×"] $ P Times, - simple ["<<", "⊲"] $ P Triangle, + simple '→' $ P Arrow, + simple '⇒' $ P DblArrow, + simple '×' $ P Times, + simple '⊲' $ P Triangle, match wild $ const $ P Wild, - keyword "fun" Fun, keyword "λ" Fun, - keyword "let" Let, keyword "in" In, - keyword "case" Case, keyword "of" Of, - keyword "ω" Omega, simple ["#"] $ K Omega, + keyword "λ" Lam, + keyword "let" Let, keyword "in" In, + keyword "case" Case, keyword "of" Of, + keyword "ω" Omega, + keyword "Π" Pi, keyword "Σ" Sigma, keyword "W" W, + simple '★' $ K TYPE, match name $ Name, match symbol $ Symbol . assert_total strTail, - match decimal $ N . natToNumber . makeDec, - match hexadecimal $ N . natToNumber . makeHex . drop 2, - match octal $ N . natToNumber . makeOct . drop 2 + match decimal $ N . natToNumber . cast ] diff --git a/tests/Tests/Lexer.idr b/tests/Tests/Lexer.idr index c9b0ca3..c3f0fe7 100644 --- a/tests/Tests/Lexer.idr +++ b/tests/Tests/Lexer.idr @@ -84,17 +84,16 @@ tests = "lexer" :- [ ], "punctuation" :- [ - acceptsWith' "({[:,::]})" + acceptsWith' "({[:,]})" [P LParen, P LBrace, P LSquare, - P Colon, P Comma, P DblColon, + P Colon, P Comma, P RSquare, P RBrace, P RParen], - acceptsWith' " ( { [ : , :: ] } ) " + acceptsWith' " ( { [ : , ] } ) " [P LParen, P LBrace, P LSquare, - P Colon, P Comma, P DblColon, + P Colon, P Comma, P RSquare, P RBrace, P RParen], - acceptsWith' "-> → => ⇒ ** × << ⊲ ∷" - [P Arrow, P Arrow, P DblArrow, P DblArrow, - P Times, P Times, P Triangle, P Triangle, P DblColon], + acceptsWith' "→ ⇒ × ⊲ ∷" + [P Arrow, P DblArrow, P Times, P Triangle, P DblColon], acceptsWith' "_" [P Wild] ], @@ -119,14 +118,12 @@ tests = "lexer" :- [ ], "keywords" :- [ - acceptsWith' "fun" [K Fun], - acceptsWith' "λ" [K Fun], + acceptsWith' "λ" [K Lam], acceptsWith' "let" [K Let], acceptsWith' "in" [K In], acceptsWith' "case" [K Case], acceptsWith' "of" [K Of], acceptsWith' "ω" [K Omega], - acceptsWith' "#" [K Omega], acceptsWith' "funk" [Name "funk"] ], @@ -134,21 +131,6 @@ tests = "lexer" :- [ acceptsWith' "0" [N Zero], acceptsWith' "1" [N One], acceptsWith' "2" [N $ Other 2], - acceptsWith' "69" [N $ Other 69], - acceptsWith' "1_000" [N $ Other 1000], - acceptsWith' "0o0" [N Zero], - acceptsWith' "0o105" [N $ Other 69], - acceptsWith' "0O0" [N Zero], - acceptsWith' "0O105" [N $ Other 69], - acceptsWith' "0x0" [N Zero], - acceptsWith' "0x45" [N $ Other 69], - acceptsWith' "0xabc" [N $ Other 2748], - acceptsWith' "0xABC" [N $ Other 2748], - acceptsWith' "0xA_BC" [N $ Other 2748], - acceptsWith' "0X0" [N Zero], - acceptsWith' "0X45" [N $ Other 69], - acceptsWith' "0XABC" [N $ Other 2748], - rejects' "1_", - rejects' "1__000" + acceptsWith' "69" [N $ Other 69] ] ]