quox/src/Quox/Lexer.idr
2022-05-02 22:38:37 +02:00

131 lines
3.1 KiB
Idris
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

module Quox.Lexer
import Quox.Error
import Data.String
import public Text.Lexer
%default total
public export
record Error where
constructor Err
line, col : Int
char : Char
public export
data Punc
= LParen | RParen
| LSquare | RSquare
| LBrace | RBrace
| Comma
| Colon | DblColon
| Arrow | DblArrow
| Times | Triangle
| Wild
puncRepr : Punc -> Nat
puncRepr = \case
LParen => 0; RParen => 1; LSquare => 2; RSquare => 3
LBrace => 4; RBrace => 5; Comma => 5
Colon => 6; DblColon => 7; Arrow => 8; DblArrow => 9
Times => 10; Triangle => 11
Wild => 12
export Eq Punc where (==) = (==) `on` puncRepr
export Ord Punc where compare = compare `on` puncRepr
export
Show Punc where
show = \case
LParen => "'('"; RParen => "')'"; LSquare => "'['"; RSquare => "']'"
LBrace => "'{'"; RBrace => "'}'"; Comma => "','"
Colon => "':'"; DblColon => "'∷'"; Arrow => "'→'"; DblArrow => "'⇒'"
Times => "'×'"; Triangle => "'⊲'"
Wild => "'_'"
public export
data Kind
= P Punc
| Name | Symbol
kindRepr : Kind -> (Nat, Nat)
kindRepr (P p) = (0, puncRepr p)
kindRepr Name = (1, 0)
kindRepr Symbol = (2, 0)
export Eq Kind where (==) = (==) `on` kindRepr
export Ord Kind where compare = compare `on` kindRepr
export
Show Kind where
show (P p) = show p
show Name = "Name"
show Symbol = "Symbol"
export
TokenKind Kind where
TokType (P _) = ()
TokType Name = String
TokType Symbol = String
tokValue (P _) _ = ()
tokValue Name x = x
tokValue Symbol x = assert_total strTail x
Token' = Token (Maybe Kind)
Token = Token Kind
TokenMap' = TokenMap Token'
TokenMap = TokenMap Token
nameStart = pred $ \c => isAlpha c || c == '_'
nameCont = pred $ \c => isAlphaNum c || c == '_' || c == '\''
name = nameStart <+> many nameCont <+> reject nameCont
wild = exact "_" <+> reject nameCont
%hide Text.Lexer.symbol
symbol = exact "'" <+> name
tokens = toTokenMap [
(lineComment (exact "--"), Nothing),
(blockComment (exact "{-") (exact "-}"), Nothing),
(spaces, Nothing),
(exact "(", Just $ P LParen), (exact ")", Just $ P RParen),
(exact "[", Just $ P LSquare), (exact "]", Just $ P RSquare),
(exact "{", Just $ P LBrace), (exact "}", Just $ P RBrace),
(exact ",", Just $ P Comma),
(exact "::" <|> exact "", Just $ P DblColon),
(exact ":", Just $ P Colon),
(exact "->" <|> exact "", Just $ P Arrow),
(exact "=>" <|> exact "", Just $ P DblArrow),
(exact "**" <|> exact "×", Just $ P Times),
(exact "<<" <|> exact "", Just $ P Triangle),
(wild, Just $ P Wild),
(name, Just Name), (symbol, Just Symbol)
]
sequenceT : Token (Maybe Kind) -> Maybe (Token Kind)
sequenceT tok =
case tok.kind of
Just k => Just $ {kind := k} tok
Nothing => Nothing
export
lex : MonadThrow Error m => String -> m (List (WithBounds Token))
lex str =
let (res, (line, col, str)) = lex tokens str in
case asList str of
[] => pure $ mapMaybe (traverse sequenceT) res
c :: _ => throw $ Err {line, col, char = c}