quox/lib/Quox/Lexer.idr
2022-05-09 18:31:30 +02:00

102 lines
2.5 KiB
Idris
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

module Quox.Lexer
import public Quox.Token
import Data.String
import Data.String.Extra
import public Text.Lexer
import public Text.Lexer.Tokenizer
import Control.Monad.Either
import Generics.Derive
%default total
%language ElabReflection
public export
record Error where
constructor Err
reason : StopReason
line, col : Int
char : Char
nameStart = pred $ \c => isAlpha c || c == '_'
nameCont = pred $ \c => isAlphaNum c || c == '_' || c == '\''
name = nameStart <+> many nameCont <+> reject nameCont
wild = is '_' <+> reject nameCont
%hide Text.Lexer.symbol
symbol = is '\'' <+> name
decimal = some digit <+> reject nameCont
natToNumber : Nat -> Number
natToNumber 0 = Zero
natToNumber 1 = One
natToNumber k = Other k
skip : Lexer -> Tokenizer (Maybe a)
skip lex = match lex $ const Nothing
simple : Char -> a -> Tokenizer (Maybe a)
simple ch = match (is ch) . const . Just
keyword : String -> Keyword -> Tokenizer (Maybe Token)
keyword str = match (exact str <+> reject nameCont) . const . Just . K
choice : (xs : List (Tokenizer a)) -> {auto 0 _ : NonEmpty xs} -> Tokenizer a
choice (t :: ts) = foldl (\a, b => a <|> b) t ts
match : Lexer -> (String -> a) -> Tokenizer (Maybe a)
match lex f = Tokenizer.match lex (Just . f)
%hide Tokenizer.match
tokens : Tokenizer (Maybe Token)
tokens = choice [
skip $ lineComment $ exact "--",
skip $ blockComment (exact "{-") (exact "-}"),
skip spaces,
simple '(' $ P LParen, simple ')' $ P RParen,
simple '[' $ P LSquare, simple ']' $ P RSquare,
simple '{' $ P LBrace, simple '}' $ P RBrace,
simple ',' $ P Comma,
simple '' $ P DblColon,
simple ':' $ P Colon, -- needs to be after '::'
simple '.' $ P Dot,
simple '' $ P Arrow,
simple '' $ P DblArrow,
simple '×' $ P Times,
simple '' $ P Triangle,
match wild $ const $ P Wild,
keyword "λ" Lam,
keyword "let" Let, keyword "in" In,
keyword "case" Case, keyword "of" Of,
keyword "ω" Omega,
keyword "Π" Pi, keyword "Σ" Sigma, keyword "W" W,
match name $ Name,
match symbol $ Symbol . assert_total strTail,
match decimal $ N . natToNumber . cast,
match (is '' <+> decimal) $ TYPE . cast . assert_total strTail
]
export
lex : String -> Either Error (List BToken)
lex str =
let (res, (reason, line, col, str)) = lex tokens str in
case reason of
EndInput => Right $ mapMaybe sequence res
_ => let char = assert_total strIndex str 0 in
Left $ Err {reason, line, col, char}