131 lines
3.1 KiB
Idris
131 lines
3.1 KiB
Idris
module Quox.Lexer
|
||
|
||
import Quox.Error
|
||
|
||
import Data.String
|
||
import public Text.Lexer
|
||
|
||
%default total
|
||
|
||
|
||
public export
|
||
record Error where
|
||
constructor Err
|
||
line, col : Int
|
||
char : Char
|
||
|
||
public export
|
||
data Punc
|
||
= LParen | RParen
|
||
| LSquare | RSquare
|
||
| LBrace | RBrace
|
||
| Comma
|
||
| Colon | DblColon
|
||
| Arrow | DblArrow
|
||
| Times | Triangle
|
||
| Wild
|
||
|
||
puncRepr : Punc -> Nat
|
||
puncRepr = \case
|
||
LParen => 0; RParen => 1; LSquare => 2; RSquare => 3
|
||
LBrace => 4; RBrace => 5; Comma => 5
|
||
Colon => 6; DblColon => 7; Arrow => 8; DblArrow => 9
|
||
Times => 10; Triangle => 11
|
||
Wild => 12
|
||
|
||
export Eq Punc where (==) = (==) `on` puncRepr
|
||
export Ord Punc where compare = compare `on` puncRepr
|
||
|
||
export
|
||
Show Punc where
|
||
show = \case
|
||
LParen => "'('"; RParen => "')'"; LSquare => "'['"; RSquare => "']'"
|
||
LBrace => "'{'"; RBrace => "'}'"; Comma => "','"
|
||
Colon => "':'"; DblColon => "'∷'"; Arrow => "'→'"; DblArrow => "'⇒'"
|
||
Times => "'×'"; Triangle => "'⊲'"
|
||
Wild => "'_'"
|
||
|
||
|
||
|
||
public export
|
||
data Kind
|
||
= P Punc
|
||
| Name | Symbol
|
||
|
||
kindRepr : Kind -> (Nat, Nat)
|
||
kindRepr (P p) = (0, puncRepr p)
|
||
kindRepr Name = (1, 0)
|
||
kindRepr Symbol = (2, 0)
|
||
|
||
export Eq Kind where (==) = (==) `on` kindRepr
|
||
export Ord Kind where compare = compare `on` kindRepr
|
||
|
||
export
|
||
Show Kind where
|
||
show (P p) = show p
|
||
show Name = "Name"
|
||
show Symbol = "Symbol"
|
||
|
||
|
||
export
|
||
TokenKind Kind where
|
||
TokType (P _) = ()
|
||
TokType Name = String
|
||
TokType Symbol = String
|
||
|
||
tokValue (P _) _ = ()
|
||
tokValue Name x = x
|
||
tokValue Symbol x = assert_total strTail x
|
||
|
||
|
||
Token' = Token (Maybe Kind)
|
||
Token = Token Kind
|
||
|
||
TokenMap' = TokenMap Token'
|
||
TokenMap = TokenMap Token
|
||
|
||
nameStart = pred $ \c => isAlpha c || c == '_'
|
||
nameCont = pred $ \c => isAlphaNum c || c == '_' || c == '\''
|
||
|
||
name = nameStart <+> many nameCont <+> reject nameCont
|
||
|
||
wild = exact "_" <+> reject nameCont
|
||
|
||
%hide Text.Lexer.symbol
|
||
symbol = exact "'" <+> name
|
||
|
||
|
||
tokens = toTokenMap [
|
||
(lineComment (exact "--"), Nothing),
|
||
(blockComment (exact "{-") (exact "-}"), Nothing),
|
||
(spaces, Nothing),
|
||
|
||
(exact "(", Just $ P LParen), (exact ")", Just $ P RParen),
|
||
(exact "[", Just $ P LSquare), (exact "]", Just $ P RSquare),
|
||
(exact "{", Just $ P LBrace), (exact "}", Just $ P RBrace),
|
||
(exact ",", Just $ P Comma),
|
||
(exact "::" <|> exact "∷", Just $ P DblColon),
|
||
(exact ":", Just $ P Colon),
|
||
|
||
(exact "->" <|> exact "→", Just $ P Arrow),
|
||
(exact "=>" <|> exact "⇒", Just $ P DblArrow),
|
||
(exact "**" <|> exact "×", Just $ P Times),
|
||
(exact "<<" <|> exact "⊲", Just $ P Triangle),
|
||
(wild, Just $ P Wild),
|
||
|
||
(name, Just Name), (symbol, Just Symbol)
|
||
]
|
||
|
||
sequenceT : Token (Maybe Kind) -> Maybe (Token Kind)
|
||
sequenceT tok =
|
||
case tok.kind of
|
||
Just k => Just $ {kind := k} tok
|
||
Nothing => Nothing
|
||
|
||
export
|
||
lex : MonadThrow Error m => String -> m (List (WithBounds Token))
|
||
lex str =
|
||
let (res, (line, col, str)) = lex tokens str in
|
||
case asList str of
|
||
[] => pure $ mapMaybe (traverse sequenceT) res
|
||
c :: _ => throw $ Err {line, col, char = c}
|