simplify lexical grammar

this is a core language after all
This commit is contained in:
rhiannon morris 2022-05-07 21:26:16 +02:00
parent 1c8b2b205b
commit 809a0fc859
2 changed files with 31 additions and 75 deletions

View file

@ -27,17 +27,12 @@ nameCont = pred $ \c => isAlphaNum c || c == '_' || c == '\''
name = nameStart <+> many nameCont <+> reject nameCont name = nameStart <+> many nameCont <+> reject nameCont
wild = exact "_" <+> reject nameCont wild = is '_' <+> reject nameCont
%hide Text.Lexer.symbol %hide Text.Lexer.symbol
symbol = exact "'" <+> name symbol = is '\'' <+> name
number : Lexer -> Lexer decimal = some digit <+> reject nameCont
number char = char <+> many (opt (is '_') <+> char) <+> reject nameCont
octal = approx "0o" <+> number octDigit
decimal = number digit
hexadecimal = approx "0x" <+> number hexDigit
natToNumber : Nat -> Number natToNumber : Nat -> Number
@ -46,32 +41,11 @@ natToNumber 1 = One
natToNumber k = Other k natToNumber k = Other k
toHexit : Char -> Nat
toHexit c = cast $
if '0' <= c && c <= '9' then
ord c - ord '0'
else if 'a' <= c && c <= 'f' then
ord c - ord 'a' + 10
else if 'A' <= c && c <= 'F' then
ord c - ord 'A' + 10
else 0
parameters (base : Nat) (single : Char -> Nat)
makeNat : Nat -> List Char -> Nat
makeNat acc [] = acc
makeNat acc ('_' :: lst) = makeNat acc lst
makeNat acc (d :: lst) = makeNat (acc * base + single d) lst
makeOct = makeNat 8 toHexit 0 . unpack
makeDec = makeNat 10 toHexit 0 . unpack
makeHex = makeNat 16 toHexit 0 . unpack
skip : Lexer -> Tokenizer (Maybe a) skip : Lexer -> Tokenizer (Maybe a)
skip lex = match lex $ const Nothing skip lex = match lex $ const Nothing
simple : List String -> a -> Tokenizer (Maybe a) simple : Char -> a -> Tokenizer (Maybe a)
simple strs = match (choice $ map exact strs) . const . Just simple ch = match (is ch) . const . Just
keyword : String -> Keyword -> Tokenizer (Maybe Token) keyword : String -> Keyword -> Tokenizer (Maybe Token)
keyword str = match (exact str <+> reject nameCont) . const . Just . K keyword str = match (exact str <+> reject nameCont) . const . Just . K
@ -90,31 +64,31 @@ tokens = choice [
skip $ blockComment (exact "{-") (exact "-}"), skip $ blockComment (exact "{-") (exact "-}"),
skip spaces, skip spaces,
simple ["("] $ P LParen, simple [")"] $ P RParen, simple '(' $ P LParen, simple ')' $ P RParen,
simple ["["] $ P LSquare, simple ["]"] $ P RSquare, simple '[' $ P LSquare, simple ']' $ P RSquare,
simple ["{"] $ P LBrace, simple ["}"] $ P RBrace, simple '{' $ P LBrace, simple '}' $ P RBrace,
simple [","] $ P Comma, simple ',' $ P Comma,
simple ["::", ""] $ P DblColon, simple '' $ P DblColon,
simple [":"] $ P Colon, -- needs to be after "::" simple ':' $ P Colon, -- needs to be after '::'
simple ["."] $ P Dot, simple '.' $ P Dot,
simple ["->", ""] $ P Arrow, simple '' $ P Arrow,
simple ["=>", ""] $ P DblArrow, simple '' $ P DblArrow,
simple ["**", "×"] $ P Times, simple '×' $ P Times,
simple ["<<", ""] $ P Triangle, simple '' $ P Triangle,
match wild $ const $ P Wild, match wild $ const $ P Wild,
keyword "fun" Fun, keyword "λ" Fun, keyword "λ" Lam,
keyword "let" Let, keyword "in" In, keyword "let" Let, keyword "in" In,
keyword "case" Case, keyword "of" Of, keyword "case" Case, keyword "of" Of,
keyword "ω" Omega, simple ["#"] $ K Omega, keyword "ω" Omega,
keyword "Π" Pi, keyword "Σ" Sigma, keyword "W" W,
simple '' $ K TYPE,
match name $ Name, match name $ Name,
match symbol $ Symbol . assert_total strTail, match symbol $ Symbol . assert_total strTail,
match decimal $ N . natToNumber . makeDec, match decimal $ N . natToNumber . cast
match hexadecimal $ N . natToNumber . makeHex . drop 2,
match octal $ N . natToNumber . makeOct . drop 2
] ]

View file

@ -84,17 +84,16 @@ tests = "lexer" :- [
], ],
"punctuation" :- [ "punctuation" :- [
acceptsWith' "({[:,::]})" acceptsWith' "({[:,]})"
[P LParen, P LBrace, P LSquare, [P LParen, P LBrace, P LSquare,
P Colon, P Comma, P DblColon, P Colon, P Comma,
P RSquare, P RBrace, P RParen], P RSquare, P RBrace, P RParen],
acceptsWith' " ( { [ : , :: ] } ) " acceptsWith' " ( { [ : , ] } ) "
[P LParen, P LBrace, P LSquare, [P LParen, P LBrace, P LSquare,
P Colon, P Comma, P DblColon, P Colon, P Comma,
P RSquare, P RBrace, P RParen], P RSquare, P RBrace, P RParen],
acceptsWith' "-> → => ⇒ ** × << ⊲ ∷" acceptsWith' "→ ⇒ × ⊲ ∷"
[P Arrow, P Arrow, P DblArrow, P DblArrow, [P Arrow, P DblArrow, P Times, P Triangle, P DblColon],
P Times, P Times, P Triangle, P Triangle, P DblColon],
acceptsWith' "_" [P Wild] acceptsWith' "_" [P Wild]
], ],
@ -119,14 +118,12 @@ tests = "lexer" :- [
], ],
"keywords" :- [ "keywords" :- [
acceptsWith' "fun" [K Fun], acceptsWith' "λ" [K Lam],
acceptsWith' "λ" [K Fun],
acceptsWith' "let" [K Let], acceptsWith' "let" [K Let],
acceptsWith' "in" [K In], acceptsWith' "in" [K In],
acceptsWith' "case" [K Case], acceptsWith' "case" [K Case],
acceptsWith' "of" [K Of], acceptsWith' "of" [K Of],
acceptsWith' "ω" [K Omega], acceptsWith' "ω" [K Omega],
acceptsWith' "#" [K Omega],
acceptsWith' "funk" [Name "funk"] acceptsWith' "funk" [Name "funk"]
], ],
@ -134,21 +131,6 @@ tests = "lexer" :- [
acceptsWith' "0" [N Zero], acceptsWith' "0" [N Zero],
acceptsWith' "1" [N One], acceptsWith' "1" [N One],
acceptsWith' "2" [N $ Other 2], acceptsWith' "2" [N $ Other 2],
acceptsWith' "69" [N $ Other 69], acceptsWith' "69" [N $ Other 69]
acceptsWith' "1_000" [N $ Other 1000],
acceptsWith' "0o0" [N Zero],
acceptsWith' "0o105" [N $ Other 69],
acceptsWith' "0O0" [N Zero],
acceptsWith' "0O105" [N $ Other 69],
acceptsWith' "0x0" [N Zero],
acceptsWith' "0x45" [N $ Other 69],
acceptsWith' "0xabc" [N $ Other 2748],
acceptsWith' "0xABC" [N $ Other 2748],
acceptsWith' "0xA_BC" [N $ Other 2748],
acceptsWith' "0X0" [N Zero],
acceptsWith' "0X45" [N $ Other 69],
acceptsWith' "0XABC" [N $ Other 2748],
rejects' "1_",
rejects' "1__000"
] ]
] ]