module Quox.Lexer import public Quox.Token import Data.String import Data.String.Extra import public Text.Lexer import public Text.Lexer.Tokenizer import Control.Monad.Either import Generics.Derive %default total %language ElabReflection public export record Error where constructor Err reason : StopReason line, col : Int char : Char nameStart = pred $ \c => isAlpha c || c == '_' nameCont = pred $ \c => isAlphaNum c || c == '_' || c == '\'' name = nameStart <+> many nameCont <+> reject nameCont wild = is '_' <+> reject nameCont %hide Text.Lexer.symbol symbol = is '\'' <+> name decimal = some digit <+> reject nameCont natToNumber : Nat -> Number natToNumber 0 = Zero natToNumber 1 = One natToNumber k = Other k skip : Lexer -> Tokenizer (Maybe a) skip lex = match lex $ const Nothing simple : Char -> a -> Tokenizer (Maybe a) simple ch = match (is ch) . const . Just keyword : String -> Keyword -> Tokenizer (Maybe Token) keyword str = match (exact str <+> reject nameCont) . const . Just . K choice : (xs : List (Tokenizer a)) -> {auto 0 _ : NonEmpty xs} -> Tokenizer a choice (t :: ts) = foldl (\a, b => a <|> b) t ts match : Lexer -> (String -> a) -> Tokenizer (Maybe a) match lex f = Tokenizer.match lex (Just . f) %hide Tokenizer.match tokens : Tokenizer (Maybe Token) tokens = choice [ skip $ lineComment $ exact "--", skip $ blockComment (exact "{-") (exact "-}"), skip spaces, simple '(' $ P LParen, simple ')' $ P RParen, simple '[' $ P LSquare, simple ']' $ P RSquare, simple '{' $ P LBrace, simple '}' $ P RBrace, simple ',' $ P Comma, simple '∷' $ P DblColon, simple ':' $ P Colon, -- needs to be after '::' simple '.' $ P Dot, simple '→' $ P Arrow, simple '⇒' $ P DblArrow, simple '×' $ P Times, simple '⊲' $ P Triangle, match wild $ const $ P Wild, keyword "λ" Lam, keyword "let" Let, keyword "in" In, keyword "case" Case, keyword "of" Of, keyword "ω" Omega, keyword "Π" Pi, keyword "Σ" Sigma, keyword "W" W, match name $ Name, match symbol $ Symbol . assert_total strTail, match decimal $ N . natToNumber . cast, match (is '★' <+> decimal) $ TYPE . cast . assert_total strTail ] export lex : String -> Either Error (List BToken) lex str = let (res, (reason, line, col, str)) = lex tokens str in case reason of EndInput => Right $ mapMaybe sequence res _ => let char = assert_total strIndex str 0 in Left $ Err {reason, line, col, char}