module Quox.Lexer import public Quox.Token import Data.String import Data.String.Extra import public Text.Lexer import public Text.Lexer.Tokenizer import Control.Monad.Either import Generics.Derive %default total %language ElabReflection public export record Error where constructor Err reason : StopReason line, col : Int char : Char nameStart = pred $ \c => isAlpha c || c == '_' nameCont = pred $ \c => isAlphaNum c || c == '_' || c == '\'' name = nameStart <+> many nameCont <+> reject nameCont wild = exact "_" <+> reject nameCont %hide Text.Lexer.symbol symbol = exact "'" <+> name number : Lexer -> Lexer number char = char <+> many (opt (is '_') <+> char) <+> reject nameCont octal = approx "0o" <+> number octDigit decimal = number digit hexadecimal = approx "0x" <+> number hexDigit natToNumber : Nat -> Number natToNumber 0 = Zero natToNumber 1 = One natToNumber k = Other k toHexit : Char -> Nat toHexit c = cast $ if '0' <= c && c <= '9' then ord c - ord '0' else if 'a' <= c && c <= 'f' then ord c - ord 'a' + 10 else if 'A' <= c && c <= 'F' then ord c - ord 'A' + 10 else 0 parameters (base : Nat) (single : Char -> Nat) makeNat : Nat -> List Char -> Nat makeNat acc [] = acc makeNat acc ('_' :: lst) = makeNat acc lst makeNat acc (d :: lst) = makeNat (acc * base + single d) lst makeOct = makeNat 8 toHexit 0 . unpack makeDec = makeNat 10 toHexit 0 . unpack makeHex = makeNat 16 toHexit 0 . unpack skip : Lexer -> Tokenizer (Maybe a) skip lex = match lex $ const Nothing simple : List String -> a -> Tokenizer (Maybe a) simple strs = match (choice $ map exact strs) . const . Just keyword : String -> Keyword -> Tokenizer (Maybe Token) keyword str = match (exact str <+> reject nameCont) . const . Just . K choice : (xs : List (Tokenizer a)) -> {auto 0 _ : NonEmpty xs} -> Tokenizer a choice (t :: ts) = foldl (\a, b => a <|> b) t ts match : Lexer -> (String -> a) -> Tokenizer (Maybe a) match lex f = Tokenizer.match lex (Just . f) %hide Tokenizer.match tokens : Tokenizer (Maybe Token) tokens = choice [ skip $ lineComment $ exact "--", skip $ blockComment (exact "{-") (exact "-}"), skip spaces, simple ["("] $ P LParen, simple [")"] $ P RParen, simple ["["] $ P LSquare, simple ["]"] $ P RSquare, simple ["{"] $ P LBrace, simple ["}"] $ P RBrace, simple [","] $ P Comma, simple ["::", "∷"] $ P DblColon, simple [":"] $ P Colon, -- needs to be after "::" simple ["."] $ P Dot, simple ["->", "→"] $ P Arrow, simple ["=>", "⇒"] $ P DblArrow, simple ["**", "×"] $ P Times, simple ["<<", "⊲"] $ P Triangle, match wild $ const $ P Wild, keyword "fun" Fun, keyword "λ" Fun, keyword "let" Let, keyword "in" In, keyword "case" Case, keyword "of" Of, keyword "ω" Omega, simple ["#"] $ K Omega, match name $ Name, match symbol $ Symbol . assert_total strTail, match decimal $ N . natToNumber . makeDec, match hexadecimal $ N . natToNumber . makeHex . drop 2, match octal $ N . natToNumber . makeOct . drop 2 ] export lex : MonadError Error m => String -> m (List (WithBounds Token)) lex str = let (res, (reason, line, col, str)) = lex tokens str in case reason of EndInput => pure $ mapMaybe sequence res _ => let char = assert_total strIndex str 0 in throwError $ Err {reason, line, col, char}