2022-05-02 11:13:13 -04:00
|
|
|
|
module Quox.Lexer
|
|
|
|
|
|
2022-05-04 09:30:08 -04:00
|
|
|
|
import public Quox.Token
|
2022-05-02 11:13:13 -04:00
|
|
|
|
|
|
|
|
|
import Data.String
|
2022-05-03 21:11:37 -04:00
|
|
|
|
import Data.String.Extra
|
2022-05-02 11:13:13 -04:00
|
|
|
|
import public Text.Lexer
|
2022-05-02 20:03:22 -04:00
|
|
|
|
import public Text.Lexer.Tokenizer
|
2022-05-06 15:23:58 -04:00
|
|
|
|
import Control.Monad.Either
|
2022-05-02 16:40:28 -04:00
|
|
|
|
import Generics.Derive
|
2022-05-02 11:13:13 -04:00
|
|
|
|
|
2022-05-02 16:38:37 -04:00
|
|
|
|
%default total
|
2022-05-02 16:40:28 -04:00
|
|
|
|
%language ElabReflection
|
2022-05-02 16:38:37 -04:00
|
|
|
|
|
2022-05-02 11:13:13 -04:00
|
|
|
|
|
|
|
|
|
public export
|
|
|
|
|
record Error where
|
|
|
|
|
constructor Err
|
2022-05-02 20:03:22 -04:00
|
|
|
|
reason : StopReason
|
2022-05-02 11:13:13 -04:00
|
|
|
|
line, col : Int
|
|
|
|
|
char : Char
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
nameStart = pred $ \c => isAlpha c || c == '_'
|
|
|
|
|
nameCont = pred $ \c => isAlphaNum c || c == '_' || c == '\''
|
|
|
|
|
|
|
|
|
|
name = nameStart <+> many nameCont <+> reject nameCont
|
|
|
|
|
|
2022-05-07 15:26:16 -04:00
|
|
|
|
wild = is '_' <+> reject nameCont
|
2022-05-02 11:13:13 -04:00
|
|
|
|
|
|
|
|
|
%hide Text.Lexer.symbol
|
2022-05-07 15:26:16 -04:00
|
|
|
|
symbol = is '\'' <+> name
|
2022-05-02 11:13:13 -04:00
|
|
|
|
|
2022-05-07 15:26:16 -04:00
|
|
|
|
decimal = some digit <+> reject nameCont
|
2022-05-02 11:13:13 -04:00
|
|
|
|
|
2022-05-03 18:49:09 -04:00
|
|
|
|
|
|
|
|
|
natToNumber : Nat -> Number
|
|
|
|
|
natToNumber 0 = Zero
|
|
|
|
|
natToNumber 1 = One
|
|
|
|
|
natToNumber k = Other k
|
|
|
|
|
|
|
|
|
|
|
2022-05-02 20:03:22 -04:00
|
|
|
|
skip : Lexer -> Tokenizer (Maybe a)
|
|
|
|
|
skip lex = match lex $ const Nothing
|
2022-05-02 11:13:13 -04:00
|
|
|
|
|
2022-05-07 15:26:16 -04:00
|
|
|
|
simple : Char -> a -> Tokenizer (Maybe a)
|
|
|
|
|
simple ch = match (is ch) . const . Just
|
2022-05-04 09:30:52 -04:00
|
|
|
|
|
|
|
|
|
keyword : String -> Keyword -> Tokenizer (Maybe Token)
|
|
|
|
|
keyword str = match (exact str <+> reject nameCont) . const . Just . K
|
2022-05-02 11:13:13 -04:00
|
|
|
|
|
2022-05-04 09:30:37 -04:00
|
|
|
|
choice : (xs : List (Tokenizer a)) -> {auto 0 _ : NonEmpty xs} -> Tokenizer a
|
2022-05-02 20:03:22 -04:00
|
|
|
|
choice (t :: ts) = foldl (\a, b => a <|> b) t ts
|
2022-05-02 11:13:13 -04:00
|
|
|
|
|
2022-05-03 21:11:37 -04:00
|
|
|
|
match : Lexer -> (String -> a) -> Tokenizer (Maybe a)
|
|
|
|
|
match lex f = Tokenizer.match lex (Just . f)
|
|
|
|
|
%hide Tokenizer.match
|
|
|
|
|
|
|
|
|
|
|
2022-05-02 20:03:22 -04:00
|
|
|
|
tokens : Tokenizer (Maybe Token)
|
|
|
|
|
tokens = choice [
|
|
|
|
|
skip $ lineComment $ exact "--",
|
|
|
|
|
skip $ blockComment (exact "{-") (exact "-}"),
|
|
|
|
|
skip spaces,
|
|
|
|
|
|
2022-05-07 15:26:16 -04:00
|
|
|
|
simple '(' $ P LParen, simple ')' $ P RParen,
|
|
|
|
|
simple '[' $ P LSquare, simple ']' $ P RSquare,
|
|
|
|
|
simple '{' $ P LBrace, simple '}' $ P RBrace,
|
|
|
|
|
simple ',' $ P Comma,
|
|
|
|
|
simple '∷' $ P DblColon,
|
|
|
|
|
simple ':' $ P Colon, -- needs to be after '::'
|
|
|
|
|
simple '.' $ P Dot,
|
|
|
|
|
|
|
|
|
|
simple '→' $ P Arrow,
|
|
|
|
|
simple '⇒' $ P DblArrow,
|
|
|
|
|
simple '×' $ P Times,
|
|
|
|
|
simple '⊲' $ P Triangle,
|
2022-05-03 21:11:37 -04:00
|
|
|
|
match wild $ const $ P Wild,
|
2022-05-02 20:03:22 -04:00
|
|
|
|
|
2022-05-07 15:26:16 -04:00
|
|
|
|
keyword "λ" Lam,
|
|
|
|
|
keyword "let" Let, keyword "in" In,
|
|
|
|
|
keyword "case" Case, keyword "of" Of,
|
|
|
|
|
keyword "ω" Omega,
|
|
|
|
|
keyword "Π" Pi, keyword "Σ" Sigma, keyword "W" W,
|
|
|
|
|
simple '★' $ K TYPE,
|
2022-05-04 09:30:52 -04:00
|
|
|
|
|
2022-05-03 21:11:37 -04:00
|
|
|
|
match name $ Name,
|
|
|
|
|
match symbol $ Symbol . assert_total strTail,
|
2022-05-03 18:49:09 -04:00
|
|
|
|
|
2022-05-07 15:26:16 -04:00
|
|
|
|
match decimal $ N . natToNumber . cast
|
2022-05-02 20:03:22 -04:00
|
|
|
|
]
|
2022-05-02 11:13:13 -04:00
|
|
|
|
|
2022-05-03 21:11:37 -04:00
|
|
|
|
|
2022-05-02 11:13:13 -04:00
|
|
|
|
export
|
2022-05-06 15:58:32 -04:00
|
|
|
|
lex : String -> Either Error (List BToken)
|
2022-05-02 11:13:13 -04:00
|
|
|
|
lex str =
|
2022-05-02 20:03:22 -04:00
|
|
|
|
let (res, (reason, line, col, str)) = lex tokens str in
|
|
|
|
|
case reason of
|
2022-05-06 15:58:32 -04:00
|
|
|
|
EndInput => Right $ mapMaybe sequence res
|
2022-05-02 20:03:22 -04:00
|
|
|
|
_ => let char = assert_total strIndex str 0 in
|
2022-05-06 15:58:32 -04:00
|
|
|
|
Left $ Err {reason, line, col, char}
|