quox/lib/Quox/Lexer.idr

128 lines
3.3 KiB
Idris
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

module Quox.Lexer
import public Quox.Token
import Data.String
import Data.String.Extra
import public Text.Lexer
import public Text.Lexer.Tokenizer
import Control.Monad.Either
import Generics.Derive
%default total
%language ElabReflection
public export
record Error where
constructor Err
reason : StopReason
line, col : Int
char : Char
nameStart = pred $ \c => isAlpha c || c == '_'
nameCont = pred $ \c => isAlphaNum c || c == '_' || c == '\''
name = nameStart <+> many nameCont <+> reject nameCont
wild = exact "_" <+> reject nameCont
%hide Text.Lexer.symbol
symbol = exact "'" <+> name
number : Lexer -> Lexer
number char = char <+> many (opt (is '_') <+> char) <+> reject nameCont
octal = approx "0o" <+> number octDigit
decimal = number digit
hexadecimal = approx "0x" <+> number hexDigit
natToNumber : Nat -> Number
natToNumber 0 = Zero
natToNumber 1 = One
natToNumber k = Other k
toHexit : Char -> Nat
toHexit c = cast $
if '0' <= c && c <= '9' then
ord c - ord '0'
else if 'a' <= c && c <= 'f' then
ord c - ord 'a' + 10
else if 'A' <= c && c <= 'F' then
ord c - ord 'A' + 10
else 0
parameters (base : Nat) (single : Char -> Nat)
makeNat : Nat -> List Char -> Nat
makeNat acc [] = acc
makeNat acc ('_' :: lst) = makeNat acc lst
makeNat acc (d :: lst) = makeNat (acc * base + single d) lst
makeOct = makeNat 8 toHexit 0 . unpack
makeDec = makeNat 10 toHexit 0 . unpack
makeHex = makeNat 16 toHexit 0 . unpack
skip : Lexer -> Tokenizer (Maybe a)
skip lex = match lex $ const Nothing
simple : List String -> a -> Tokenizer (Maybe a)
simple strs = match (choice $ map exact strs) . const . Just
keyword : String -> Keyword -> Tokenizer (Maybe Token)
keyword str = match (exact str <+> reject nameCont) . const . Just . K
choice : (xs : List (Tokenizer a)) -> {auto 0 _ : NonEmpty xs} -> Tokenizer a
choice (t :: ts) = foldl (\a, b => a <|> b) t ts
match : Lexer -> (String -> a) -> Tokenizer (Maybe a)
match lex f = Tokenizer.match lex (Just . f)
%hide Tokenizer.match
tokens : Tokenizer (Maybe Token)
tokens = choice [
skip $ lineComment $ exact "--",
skip $ blockComment (exact "{-") (exact "-}"),
skip spaces,
simple ["("] $ P LParen, simple [")"] $ P RParen,
simple ["["] $ P LSquare, simple ["]"] $ P RSquare,
simple ["{"] $ P LBrace, simple ["}"] $ P RBrace,
simple [","] $ P Comma,
simple ["::", ""] $ P DblColon,
simple [":"] $ P Colon, -- needs to be after "::"
simple ["."] $ P Dot,
simple ["->", ""] $ P Arrow,
simple ["=>", ""] $ P DblArrow,
simple ["**", "×"] $ P Times,
simple ["<<", ""] $ P Triangle,
match wild $ const $ P Wild,
keyword "fun" Fun, keyword "λ" Fun,
keyword "let" Let, keyword "in" In,
keyword "case" Case, keyword "of" Of,
keyword "ω" Omega, simple ["#"] $ K Omega,
match name $ Name,
match symbol $ Symbol . assert_total strTail,
match decimal $ N . natToNumber . makeDec,
match hexadecimal $ N . natToNumber . makeHex . drop 2,
match octal $ N . natToNumber . makeOct . drop 2
]
export
lex : String -> Either Error (List BToken)
lex str =
let (res, (reason, line, col, str)) = lex tokens str in
case reason of
EndInput => Right $ mapMaybe sequence res
_ => let char = assert_total strIndex str 0 in
Left $ Err {reason, line, col, char}