simplify lexical grammar
this is a core language after all
This commit is contained in:
parent
1c8b2b205b
commit
809a0fc859
2 changed files with 31 additions and 75 deletions
|
@ -27,17 +27,12 @@ nameCont = pred $ \c => isAlphaNum c || c == '_' || c == '\''
|
||||||
|
|
||||||
name = nameStart <+> many nameCont <+> reject nameCont
|
name = nameStart <+> many nameCont <+> reject nameCont
|
||||||
|
|
||||||
wild = exact "_" <+> reject nameCont
|
wild = is '_' <+> reject nameCont
|
||||||
|
|
||||||
%hide Text.Lexer.symbol
|
%hide Text.Lexer.symbol
|
||||||
symbol = exact "'" <+> name
|
symbol = is '\'' <+> name
|
||||||
|
|
||||||
number : Lexer -> Lexer
|
decimal = some digit <+> reject nameCont
|
||||||
number char = char <+> many (opt (is '_') <+> char) <+> reject nameCont
|
|
||||||
|
|
||||||
octal = approx "0o" <+> number octDigit
|
|
||||||
decimal = number digit
|
|
||||||
hexadecimal = approx "0x" <+> number hexDigit
|
|
||||||
|
|
||||||
|
|
||||||
natToNumber : Nat -> Number
|
natToNumber : Nat -> Number
|
||||||
|
@ -46,32 +41,11 @@ natToNumber 1 = One
|
||||||
natToNumber k = Other k
|
natToNumber k = Other k
|
||||||
|
|
||||||
|
|
||||||
toHexit : Char -> Nat
|
|
||||||
toHexit c = cast $
|
|
||||||
if '0' <= c && c <= '9' then
|
|
||||||
ord c - ord '0'
|
|
||||||
else if 'a' <= c && c <= 'f' then
|
|
||||||
ord c - ord 'a' + 10
|
|
||||||
else if 'A' <= c && c <= 'F' then
|
|
||||||
ord c - ord 'A' + 10
|
|
||||||
else 0
|
|
||||||
|
|
||||||
parameters (base : Nat) (single : Char -> Nat)
|
|
||||||
makeNat : Nat -> List Char -> Nat
|
|
||||||
makeNat acc [] = acc
|
|
||||||
makeNat acc ('_' :: lst) = makeNat acc lst
|
|
||||||
makeNat acc (d :: lst) = makeNat (acc * base + single d) lst
|
|
||||||
|
|
||||||
makeOct = makeNat 8 toHexit 0 . unpack
|
|
||||||
makeDec = makeNat 10 toHexit 0 . unpack
|
|
||||||
makeHex = makeNat 16 toHexit 0 . unpack
|
|
||||||
|
|
||||||
|
|
||||||
skip : Lexer -> Tokenizer (Maybe a)
|
skip : Lexer -> Tokenizer (Maybe a)
|
||||||
skip lex = match lex $ const Nothing
|
skip lex = match lex $ const Nothing
|
||||||
|
|
||||||
simple : List String -> a -> Tokenizer (Maybe a)
|
simple : Char -> a -> Tokenizer (Maybe a)
|
||||||
simple strs = match (choice $ map exact strs) . const . Just
|
simple ch = match (is ch) . const . Just
|
||||||
|
|
||||||
keyword : String -> Keyword -> Tokenizer (Maybe Token)
|
keyword : String -> Keyword -> Tokenizer (Maybe Token)
|
||||||
keyword str = match (exact str <+> reject nameCont) . const . Just . K
|
keyword str = match (exact str <+> reject nameCont) . const . Just . K
|
||||||
|
@ -90,31 +64,31 @@ tokens = choice [
|
||||||
skip $ blockComment (exact "{-") (exact "-}"),
|
skip $ blockComment (exact "{-") (exact "-}"),
|
||||||
skip spaces,
|
skip spaces,
|
||||||
|
|
||||||
simple ["("] $ P LParen, simple [")"] $ P RParen,
|
simple '(' $ P LParen, simple ')' $ P RParen,
|
||||||
simple ["["] $ P LSquare, simple ["]"] $ P RSquare,
|
simple '[' $ P LSquare, simple ']' $ P RSquare,
|
||||||
simple ["{"] $ P LBrace, simple ["}"] $ P RBrace,
|
simple '{' $ P LBrace, simple '}' $ P RBrace,
|
||||||
simple [","] $ P Comma,
|
simple ',' $ P Comma,
|
||||||
simple ["::", "∷"] $ P DblColon,
|
simple '∷' $ P DblColon,
|
||||||
simple [":"] $ P Colon, -- needs to be after "::"
|
simple ':' $ P Colon, -- needs to be after '::'
|
||||||
simple ["."] $ P Dot,
|
simple '.' $ P Dot,
|
||||||
|
|
||||||
simple ["->", "→"] $ P Arrow,
|
simple '→' $ P Arrow,
|
||||||
simple ["=>", "⇒"] $ P DblArrow,
|
simple '⇒' $ P DblArrow,
|
||||||
simple ["**", "×"] $ P Times,
|
simple '×' $ P Times,
|
||||||
simple ["<<", "⊲"] $ P Triangle,
|
simple '⊲' $ P Triangle,
|
||||||
match wild $ const $ P Wild,
|
match wild $ const $ P Wild,
|
||||||
|
|
||||||
keyword "fun" Fun, keyword "λ" Fun,
|
keyword "λ" Lam,
|
||||||
keyword "let" Let, keyword "in" In,
|
keyword "let" Let, keyword "in" In,
|
||||||
keyword "case" Case, keyword "of" Of,
|
keyword "case" Case, keyword "of" Of,
|
||||||
keyword "ω" Omega, simple ["#"] $ K Omega,
|
keyword "ω" Omega,
|
||||||
|
keyword "Π" Pi, keyword "Σ" Sigma, keyword "W" W,
|
||||||
|
simple '★' $ K TYPE,
|
||||||
|
|
||||||
match name $ Name,
|
match name $ Name,
|
||||||
match symbol $ Symbol . assert_total strTail,
|
match symbol $ Symbol . assert_total strTail,
|
||||||
|
|
||||||
match decimal $ N . natToNumber . makeDec,
|
match decimal $ N . natToNumber . cast
|
||||||
match hexadecimal $ N . natToNumber . makeHex . drop 2,
|
|
||||||
match octal $ N . natToNumber . makeOct . drop 2
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -84,17 +84,16 @@ tests = "lexer" :- [
|
||||||
],
|
],
|
||||||
|
|
||||||
"punctuation" :- [
|
"punctuation" :- [
|
||||||
acceptsWith' "({[:,::]})"
|
acceptsWith' "({[:,]})"
|
||||||
[P LParen, P LBrace, P LSquare,
|
[P LParen, P LBrace, P LSquare,
|
||||||
P Colon, P Comma, P DblColon,
|
P Colon, P Comma,
|
||||||
P RSquare, P RBrace, P RParen],
|
P RSquare, P RBrace, P RParen],
|
||||||
acceptsWith' " ( { [ : , :: ] } ) "
|
acceptsWith' " ( { [ : , ] } ) "
|
||||||
[P LParen, P LBrace, P LSquare,
|
[P LParen, P LBrace, P LSquare,
|
||||||
P Colon, P Comma, P DblColon,
|
P Colon, P Comma,
|
||||||
P RSquare, P RBrace, P RParen],
|
P RSquare, P RBrace, P RParen],
|
||||||
acceptsWith' "-> → => ⇒ ** × << ⊲ ∷"
|
acceptsWith' "→ ⇒ × ⊲ ∷"
|
||||||
[P Arrow, P Arrow, P DblArrow, P DblArrow,
|
[P Arrow, P DblArrow, P Times, P Triangle, P DblColon],
|
||||||
P Times, P Times, P Triangle, P Triangle, P DblColon],
|
|
||||||
acceptsWith' "_" [P Wild]
|
acceptsWith' "_" [P Wild]
|
||||||
],
|
],
|
||||||
|
|
||||||
|
@ -119,14 +118,12 @@ tests = "lexer" :- [
|
||||||
],
|
],
|
||||||
|
|
||||||
"keywords" :- [
|
"keywords" :- [
|
||||||
acceptsWith' "fun" [K Fun],
|
acceptsWith' "λ" [K Lam],
|
||||||
acceptsWith' "λ" [K Fun],
|
|
||||||
acceptsWith' "let" [K Let],
|
acceptsWith' "let" [K Let],
|
||||||
acceptsWith' "in" [K In],
|
acceptsWith' "in" [K In],
|
||||||
acceptsWith' "case" [K Case],
|
acceptsWith' "case" [K Case],
|
||||||
acceptsWith' "of" [K Of],
|
acceptsWith' "of" [K Of],
|
||||||
acceptsWith' "ω" [K Omega],
|
acceptsWith' "ω" [K Omega],
|
||||||
acceptsWith' "#" [K Omega],
|
|
||||||
acceptsWith' "funk" [Name "funk"]
|
acceptsWith' "funk" [Name "funk"]
|
||||||
],
|
],
|
||||||
|
|
||||||
|
@ -134,21 +131,6 @@ tests = "lexer" :- [
|
||||||
acceptsWith' "0" [N Zero],
|
acceptsWith' "0" [N Zero],
|
||||||
acceptsWith' "1" [N One],
|
acceptsWith' "1" [N One],
|
||||||
acceptsWith' "2" [N $ Other 2],
|
acceptsWith' "2" [N $ Other 2],
|
||||||
acceptsWith' "69" [N $ Other 69],
|
acceptsWith' "69" [N $ Other 69]
|
||||||
acceptsWith' "1_000" [N $ Other 1000],
|
|
||||||
acceptsWith' "0o0" [N Zero],
|
|
||||||
acceptsWith' "0o105" [N $ Other 69],
|
|
||||||
acceptsWith' "0O0" [N Zero],
|
|
||||||
acceptsWith' "0O105" [N $ Other 69],
|
|
||||||
acceptsWith' "0x0" [N Zero],
|
|
||||||
acceptsWith' "0x45" [N $ Other 69],
|
|
||||||
acceptsWith' "0xabc" [N $ Other 2748],
|
|
||||||
acceptsWith' "0xABC" [N $ Other 2748],
|
|
||||||
acceptsWith' "0xA_BC" [N $ Other 2748],
|
|
||||||
acceptsWith' "0X0" [N Zero],
|
|
||||||
acceptsWith' "0X45" [N $ Other 69],
|
|
||||||
acceptsWith' "0XABC" [N $ Other 2748],
|
|
||||||
rejects' "1_",
|
|
||||||
rejects' "1__000"
|
|
||||||
]
|
]
|
||||||
]
|
]
|
||||||
|
|
Loading…
Reference in a new issue