128 lines
3.3 KiB
Idris
128 lines
3.3 KiB
Idris
module Quox.Lexer
|
||
|
||
import public Quox.Token
|
||
|
||
import Data.String
|
||
import Data.String.Extra
|
||
import public Text.Lexer
|
||
import public Text.Lexer.Tokenizer
|
||
import Control.Monad.Either
|
||
import Generics.Derive
|
||
|
||
%default total
|
||
%language ElabReflection
|
||
|
||
|
||
public export
|
||
record Error where
|
||
constructor Err
|
||
reason : StopReason
|
||
line, col : Int
|
||
char : Char
|
||
|
||
|
||
|
||
nameStart = pred $ \c => isAlpha c || c == '_'
|
||
nameCont = pred $ \c => isAlphaNum c || c == '_' || c == '\''
|
||
|
||
name = nameStart <+> many nameCont <+> reject nameCont
|
||
|
||
wild = exact "_" <+> reject nameCont
|
||
|
||
%hide Text.Lexer.symbol
|
||
symbol = exact "'" <+> name
|
||
|
||
number : Lexer -> Lexer
|
||
number char = char <+> many (opt (is '_') <+> char) <+> reject nameCont
|
||
|
||
octal = approx "0o" <+> number octDigit
|
||
decimal = number digit
|
||
hexadecimal = approx "0x" <+> number hexDigit
|
||
|
||
|
||
natToNumber : Nat -> Number
|
||
natToNumber 0 = Zero
|
||
natToNumber 1 = One
|
||
natToNumber k = Other k
|
||
|
||
|
||
toHexit : Char -> Nat
|
||
toHexit c = cast $
|
||
if '0' <= c && c <= '9' then
|
||
ord c - ord '0'
|
||
else if 'a' <= c && c <= 'f' then
|
||
ord c - ord 'a' + 10
|
||
else if 'A' <= c && c <= 'F' then
|
||
ord c - ord 'A' + 10
|
||
else 0
|
||
|
||
parameters (base : Nat) (single : Char -> Nat)
|
||
makeNat : Nat -> List Char -> Nat
|
||
makeNat acc [] = acc
|
||
makeNat acc ('_' :: lst) = makeNat acc lst
|
||
makeNat acc (d :: lst) = makeNat (acc * base + single d) lst
|
||
|
||
makeOct = makeNat 8 toHexit 0 . unpack
|
||
makeDec = makeNat 10 toHexit 0 . unpack
|
||
makeHex = makeNat 16 toHexit 0 . unpack
|
||
|
||
|
||
skip : Lexer -> Tokenizer (Maybe a)
|
||
skip lex = match lex $ const Nothing
|
||
|
||
simple : List String -> a -> Tokenizer (Maybe a)
|
||
simple strs = match (choice $ map exact strs) . const . Just
|
||
|
||
keyword : String -> Keyword -> Tokenizer (Maybe Token)
|
||
keyword str = match (exact str <+> reject nameCont) . const . Just . K
|
||
|
||
choice : (xs : List (Tokenizer a)) -> {auto 0 _ : NonEmpty xs} -> Tokenizer a
|
||
choice (t :: ts) = foldl (\a, b => a <|> b) t ts
|
||
|
||
match : Lexer -> (String -> a) -> Tokenizer (Maybe a)
|
||
match lex f = Tokenizer.match lex (Just . f)
|
||
%hide Tokenizer.match
|
||
|
||
|
||
tokens : Tokenizer (Maybe Token)
|
||
tokens = choice [
|
||
skip $ lineComment $ exact "--",
|
||
skip $ blockComment (exact "{-") (exact "-}"),
|
||
skip spaces,
|
||
|
||
simple ["("] $ P LParen, simple [")"] $ P RParen,
|
||
simple ["["] $ P LSquare, simple ["]"] $ P RSquare,
|
||
simple ["{"] $ P LBrace, simple ["}"] $ P RBrace,
|
||
simple [","] $ P Comma,
|
||
simple ["::", "∷"] $ P DblColon,
|
||
simple [":"] $ P Colon, -- needs to be after "::"
|
||
simple ["."] $ P Dot,
|
||
|
||
simple ["->", "→"] $ P Arrow,
|
||
simple ["=>", "⇒"] $ P DblArrow,
|
||
simple ["**", "×"] $ P Times,
|
||
simple ["<<", "⊲"] $ P Triangle,
|
||
match wild $ const $ P Wild,
|
||
|
||
keyword "fun" Fun, keyword "λ" Fun,
|
||
keyword "let" Let, keyword "in" In,
|
||
keyword "case" Case, keyword "of" Of,
|
||
keyword "ω" Omega, simple ["#"] $ K Omega,
|
||
|
||
match name $ Name,
|
||
match symbol $ Symbol . assert_total strTail,
|
||
|
||
match decimal $ N . natToNumber . makeDec,
|
||
match hexadecimal $ N . natToNumber . makeHex . drop 2,
|
||
match octal $ N . natToNumber . makeOct . drop 2
|
||
]
|
||
|
||
|
||
export
|
||
lex : String -> Either Error (List BToken)
|
||
lex str =
|
||
let (res, (reason, line, col, str)) = lex tokens str in
|
||
case reason of
|
||
EndInput => Right $ mapMaybe sequence res
|
||
_ => let char = assert_total strIndex str 0 in
|
||
Left $ Err {reason, line, col, char}
|