quox/src/Quox/Lexer.idr

130 lines
3.1 KiB
Idris
Raw Normal View History

2022-05-02 11:13:13 -04:00
module Quox.Lexer
import Quox.Error
import Data.String
import public Text.Lexer
2022-05-02 20:03:22 -04:00
import public Text.Lexer.Tokenizer
2022-05-02 16:40:28 -04:00
import Generics.Derive
2022-05-02 11:13:13 -04:00
2022-05-02 16:38:37 -04:00
%default total
2022-05-02 16:40:28 -04:00
%language ElabReflection
2022-05-02 16:38:37 -04:00
2022-05-02 11:13:13 -04:00
public export
record Error where
constructor Err
2022-05-02 20:03:22 -04:00
reason : StopReason
2022-05-02 11:13:13 -04:00
line, col : Int
char : Char
public export
data Punc
= LParen | RParen
| LSquare | RSquare
| LBrace | RBrace
| Comma
| Colon | DblColon
| Arrow | DblArrow
| Times | Triangle
| Wild
2022-05-02 16:40:28 -04:00
%runElab derive "Punc" [Generic, Meta, Eq, Ord, DecEq, Show]
2022-05-02 11:13:13 -04:00
2022-05-03 18:49:09 -04:00
||| zero and one are separate because they are
||| quantity & dimension constants
public export
data Number = Zero | One | Other Nat
%runElab derive "Number" [Generic, Meta, Eq, Ord, DecEq, Show]
2022-05-02 11:13:13 -04:00
public export
2022-05-02 20:03:22 -04:00
data Token
2022-05-02 11:13:13 -04:00
= P Punc
2022-05-02 20:03:22 -04:00
| Name String | Symbol String
2022-05-03 18:49:09 -04:00
| N Number
2022-05-02 11:13:13 -04:00
2022-05-02 20:03:22 -04:00
%runElab derive "Token" [Generic, Meta, Eq, Ord, DecEq, Show]
2022-05-02 11:13:13 -04:00
nameStart = pred $ \c => isAlpha c || c == '_'
nameCont = pred $ \c => isAlphaNum c || c == '_' || c == '\''
name = nameStart <+> many nameCont <+> reject nameCont
wild = exact "_" <+> reject nameCont
%hide Text.Lexer.symbol
symbol = exact "'" <+> name
2022-05-03 18:49:09 -04:00
decimal : Lexer
decimal =
digit <+> opt (many (digit <|> is '_') <+> digit)
natToNumber : Nat -> Number
natToNumber 0 = Zero
natToNumber 1 = One
natToNumber k = Other k
toDigit : Char -> Nat
toDigit c = cast $ ord c - ord '0'
makeDec' : Nat -> String -> Maybe Nat
makeDec' acc x with (asList x)
makeDec' acc "" | [] = pure acc
makeDec' acc (strCons '_' str) | '_' :: lst = makeDec' acc str | lst
makeDec' acc (strCons d str) | d :: lst =
if d >= '0' && d <= '9' then
makeDec' (acc * 10 + toDigit d) str | lst
else
Nothing
makeDec = fromMaybe 0 . makeDec' 0
2022-05-02 20:03:22 -04:00
skip : Lexer -> Tokenizer (Maybe a)
skip lex = match lex $ const Nothing
2022-05-02 11:13:13 -04:00
2022-05-02 20:03:22 -04:00
simple : List String -> a -> Tokenizer (Maybe a)
simple str x = match (choice $ map exact str) $ const $ Just x
2022-05-02 11:13:13 -04:00
2022-05-02 20:03:22 -04:00
choice : (xs : List (Tokenizer a)) -> {auto 0 _ : So (isCons xs)} -> Tokenizer a
choice (t :: ts) = foldl (\a, b => a <|> b) t ts
2022-05-02 11:13:13 -04:00
2022-05-02 20:03:22 -04:00
tokens : Tokenizer (Maybe Token)
tokens = choice [
skip $ lineComment $ exact "--",
skip $ blockComment (exact "{-") (exact "-}"),
skip spaces,
simple ["("] $ P LParen, simple [")"] $ P RParen,
simple ["["] $ P LSquare, simple ["]"] $ P RSquare,
simple ["{"] $ P LBrace, simple ["}"] $ P RBrace,
simple [","] $ P Comma,
simple ["::", ""] $ P DblColon,
simple [":"] $ P Colon,
2022-05-02 11:13:13 -04:00
2022-05-02 20:03:22 -04:00
simple ["->", ""] $ P Arrow,
simple ["=>", ""] $ P DblArrow,
simple ["**", "×"] $ P Times,
simple ["<<", ""] $ P Triangle,
match wild $ const $ Just $ P Wild,
match name $ Just . Name,
2022-05-03 18:49:09 -04:00
match symbol $ Just . Symbol . assert_total strTail,
-- [todo] other bases?
match (some $ digit <|> exact "_") $ Just . N . natToNumber . makeDec
2022-05-02 20:03:22 -04:00
]
2022-05-02 11:13:13 -04:00
export
lex : MonadThrow Error m => String -> m (List (WithBounds Token))
lex str =
2022-05-02 20:03:22 -04:00
let (res, (reason, line, col, str)) = lex tokens str in
case reason of
EndInput => pure $ mapMaybe sequence res
_ => let char = assert_total strIndex str 0 in
throw $ Err {reason, line, col, char}