quox/lib/Quox/Lexer.idr

103 lines
2.5 KiB
Idris
Raw Normal View History

2022-05-02 11:13:13 -04:00
module Quox.Lexer
2022-05-04 09:30:08 -04:00
import public Quox.Token
2022-05-02 11:13:13 -04:00
import Data.String
2022-05-03 21:11:37 -04:00
import Data.String.Extra
2022-05-02 11:13:13 -04:00
import public Text.Lexer
2022-05-02 20:03:22 -04:00
import public Text.Lexer.Tokenizer
import Control.Monad.Either
2022-05-02 16:40:28 -04:00
import Generics.Derive
2022-05-02 11:13:13 -04:00
2022-05-02 16:38:37 -04:00
%default total
2022-05-02 16:40:28 -04:00
%language ElabReflection
2022-05-02 16:38:37 -04:00
2022-05-02 11:13:13 -04:00
public export
record Error where
constructor Err
2022-05-02 20:03:22 -04:00
reason : StopReason
2022-05-02 11:13:13 -04:00
line, col : Int
char : Char
nameStart = pred $ \c => isAlpha c || c == '_'
nameCont = pred $ \c => isAlphaNum c || c == '_' || c == '\''
name = nameStart <+> many nameCont <+> reject nameCont
wild = is '_' <+> reject nameCont
2022-05-02 11:13:13 -04:00
%hide Text.Lexer.symbol
symbol = is '\'' <+> name
2022-05-02 11:13:13 -04:00
decimal = some digit <+> reject nameCont
2022-05-02 11:13:13 -04:00
2022-05-03 18:49:09 -04:00
natToNumber : Nat -> Number
natToNumber 0 = Zero
natToNumber 1 = One
natToNumber k = Other k
2022-05-02 20:03:22 -04:00
skip : Lexer -> Tokenizer (Maybe a)
skip lex = match lex $ const Nothing
2022-05-02 11:13:13 -04:00
simple : Char -> a -> Tokenizer (Maybe a)
simple ch = match (is ch) . const . Just
2022-05-04 09:30:52 -04:00
keyword : String -> Keyword -> Tokenizer (Maybe Token)
keyword str = match (exact str <+> reject nameCont) . const . Just . K
2022-05-02 11:13:13 -04:00
2022-05-04 09:30:37 -04:00
choice : (xs : List (Tokenizer a)) -> {auto 0 _ : NonEmpty xs} -> Tokenizer a
2022-05-02 20:03:22 -04:00
choice (t :: ts) = foldl (\a, b => a <|> b) t ts
2022-05-02 11:13:13 -04:00
2022-05-03 21:11:37 -04:00
match : Lexer -> (String -> a) -> Tokenizer (Maybe a)
match lex f = Tokenizer.match lex (Just . f)
%hide Tokenizer.match
2022-05-02 20:03:22 -04:00
tokens : Tokenizer (Maybe Token)
tokens = choice [
skip $ lineComment $ exact "--",
skip $ blockComment (exact "{-") (exact "-}"),
skip spaces,
simple '(' $ P LParen, simple ')' $ P RParen,
simple '[' $ P LSquare, simple ']' $ P RSquare,
simple '{' $ P LBrace, simple '}' $ P RBrace,
simple ',' $ P Comma,
simple '' $ P DblColon,
simple ':' $ P Colon, -- needs to be after '::'
simple '.' $ P Dot,
simple '' $ P Arrow,
simple '' $ P DblArrow,
simple '×' $ P Times,
simple '' $ P Triangle,
2022-05-03 21:11:37 -04:00
match wild $ const $ P Wild,
2022-05-02 20:03:22 -04:00
keyword "λ" Lam,
keyword "let" Let, keyword "in" In,
keyword "case" Case, keyword "of" Of,
keyword "ω" Omega,
keyword "Π" Pi, keyword "Σ" Sigma, keyword "W" W,
2022-05-04 09:30:52 -04:00
2022-05-03 21:11:37 -04:00
match name $ Name,
match symbol $ Symbol . assert_total strTail,
2022-05-03 18:49:09 -04:00
2022-05-09 12:31:30 -04:00
match decimal $ N . natToNumber . cast,
match (is '' <+> decimal) $ TYPE . cast . assert_total strTail
2022-05-02 20:03:22 -04:00
]
2022-05-02 11:13:13 -04:00
2022-05-03 21:11:37 -04:00
2022-05-02 11:13:13 -04:00
export
2022-05-06 15:58:32 -04:00
lex : String -> Either Error (List BToken)
2022-05-02 11:13:13 -04:00
lex str =
2022-05-02 20:03:22 -04:00
let (res, (reason, line, col, str)) = lex tokens str in
case reason of
2022-05-06 15:58:32 -04:00
EndInput => Right $ mapMaybe sequence res
2022-05-02 20:03:22 -04:00
_ => let char = assert_total strIndex str 0 in
2022-05-06 15:58:32 -04:00
Left $ Err {reason, line, col, char}