2022-05-02 11:13:13 -04:00
|
|
|
|
module Quox.Lexer
|
|
|
|
|
|
|
|
|
|
import Quox.Error
|
|
|
|
|
|
|
|
|
|
import Data.String
|
|
|
|
|
import public Text.Lexer
|
2022-05-02 20:03:22 -04:00
|
|
|
|
import public Text.Lexer.Tokenizer
|
2022-05-02 16:40:28 -04:00
|
|
|
|
import Generics.Derive
|
2022-05-02 11:13:13 -04:00
|
|
|
|
|
2022-05-02 16:38:37 -04:00
|
|
|
|
%default total
|
2022-05-02 16:40:28 -04:00
|
|
|
|
%language ElabReflection
|
2022-05-02 16:38:37 -04:00
|
|
|
|
|
2022-05-02 11:13:13 -04:00
|
|
|
|
|
|
|
|
|
public export
|
|
|
|
|
record Error where
|
|
|
|
|
constructor Err
|
2022-05-02 20:03:22 -04:00
|
|
|
|
reason : StopReason
|
2022-05-02 11:13:13 -04:00
|
|
|
|
line, col : Int
|
|
|
|
|
char : Char
|
|
|
|
|
|
|
|
|
|
public export
|
|
|
|
|
data Punc
|
|
|
|
|
= LParen | RParen
|
|
|
|
|
| LSquare | RSquare
|
|
|
|
|
| LBrace | RBrace
|
|
|
|
|
| Comma
|
|
|
|
|
| Colon | DblColon
|
|
|
|
|
| Arrow | DblArrow
|
|
|
|
|
| Times | Triangle
|
|
|
|
|
| Wild
|
|
|
|
|
|
2022-05-02 16:40:28 -04:00
|
|
|
|
%runElab derive "Punc" [Generic, Meta, Eq, Ord, DecEq, Show]
|
2022-05-02 11:13:13 -04:00
|
|
|
|
|
|
|
|
|
|
2022-05-03 18:49:09 -04:00
|
|
|
|
||| zero and one are separate because they are
|
|
|
|
|
||| quantity & dimension constants
|
|
|
|
|
public export
|
|
|
|
|
data Number = Zero | One | Other Nat
|
|
|
|
|
|
|
|
|
|
%runElab derive "Number" [Generic, Meta, Eq, Ord, DecEq, Show]
|
|
|
|
|
|
2022-05-02 11:13:13 -04:00
|
|
|
|
|
|
|
|
|
public export
|
2022-05-02 20:03:22 -04:00
|
|
|
|
data Token
|
2022-05-02 11:13:13 -04:00
|
|
|
|
= P Punc
|
2022-05-02 20:03:22 -04:00
|
|
|
|
| Name String | Symbol String
|
2022-05-03 18:49:09 -04:00
|
|
|
|
| N Number
|
2022-05-02 11:13:13 -04:00
|
|
|
|
|
2022-05-02 20:03:22 -04:00
|
|
|
|
%runElab derive "Token" [Generic, Meta, Eq, Ord, DecEq, Show]
|
2022-05-02 11:13:13 -04:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
nameStart = pred $ \c => isAlpha c || c == '_'
|
|
|
|
|
nameCont = pred $ \c => isAlphaNum c || c == '_' || c == '\''
|
|
|
|
|
|
|
|
|
|
name = nameStart <+> many nameCont <+> reject nameCont
|
|
|
|
|
|
|
|
|
|
wild = exact "_" <+> reject nameCont
|
|
|
|
|
|
|
|
|
|
%hide Text.Lexer.symbol
|
|
|
|
|
symbol = exact "'" <+> name
|
|
|
|
|
|
|
|
|
|
|
2022-05-03 18:49:09 -04:00
|
|
|
|
decimal : Lexer
|
|
|
|
|
decimal =
|
|
|
|
|
digit <+> opt (many (digit <|> is '_') <+> digit)
|
|
|
|
|
|
|
|
|
|
natToNumber : Nat -> Number
|
|
|
|
|
natToNumber 0 = Zero
|
|
|
|
|
natToNumber 1 = One
|
|
|
|
|
natToNumber k = Other k
|
|
|
|
|
|
|
|
|
|
toDigit : Char -> Nat
|
|
|
|
|
toDigit c = cast $ ord c - ord '0'
|
|
|
|
|
|
|
|
|
|
makeDec' : Nat -> String -> Maybe Nat
|
|
|
|
|
makeDec' acc x with (asList x)
|
|
|
|
|
makeDec' acc "" | [] = pure acc
|
|
|
|
|
makeDec' acc (strCons '_' str) | '_' :: lst = makeDec' acc str | lst
|
|
|
|
|
makeDec' acc (strCons d str) | d :: lst =
|
|
|
|
|
if d >= '0' && d <= '9' then
|
|
|
|
|
makeDec' (acc * 10 + toDigit d) str | lst
|
|
|
|
|
else
|
|
|
|
|
Nothing
|
|
|
|
|
|
|
|
|
|
makeDec = fromMaybe 0 . makeDec' 0
|
|
|
|
|
|
2022-05-02 20:03:22 -04:00
|
|
|
|
skip : Lexer -> Tokenizer (Maybe a)
|
|
|
|
|
skip lex = match lex $ const Nothing
|
2022-05-02 11:13:13 -04:00
|
|
|
|
|
2022-05-02 20:03:22 -04:00
|
|
|
|
simple : List String -> a -> Tokenizer (Maybe a)
|
|
|
|
|
simple str x = match (choice $ map exact str) $ const $ Just x
|
2022-05-02 11:13:13 -04:00
|
|
|
|
|
2022-05-02 20:03:22 -04:00
|
|
|
|
choice : (xs : List (Tokenizer a)) -> {auto 0 _ : So (isCons xs)} -> Tokenizer a
|
|
|
|
|
choice (t :: ts) = foldl (\a, b => a <|> b) t ts
|
2022-05-02 11:13:13 -04:00
|
|
|
|
|
2022-05-02 20:03:22 -04:00
|
|
|
|
tokens : Tokenizer (Maybe Token)
|
|
|
|
|
tokens = choice [
|
|
|
|
|
skip $ lineComment $ exact "--",
|
|
|
|
|
skip $ blockComment (exact "{-") (exact "-}"),
|
|
|
|
|
skip spaces,
|
|
|
|
|
|
|
|
|
|
simple ["("] $ P LParen, simple [")"] $ P RParen,
|
|
|
|
|
simple ["["] $ P LSquare, simple ["]"] $ P RSquare,
|
|
|
|
|
simple ["{"] $ P LBrace, simple ["}"] $ P RBrace,
|
|
|
|
|
simple [","] $ P Comma,
|
|
|
|
|
simple ["::", "∷"] $ P DblColon,
|
|
|
|
|
simple [":"] $ P Colon,
|
2022-05-02 11:13:13 -04:00
|
|
|
|
|
2022-05-02 20:03:22 -04:00
|
|
|
|
simple ["->", "→"] $ P Arrow,
|
|
|
|
|
simple ["=>", "⇒"] $ P DblArrow,
|
|
|
|
|
simple ["**", "×"] $ P Times,
|
|
|
|
|
simple ["<<", "⊲"] $ P Triangle,
|
|
|
|
|
match wild $ const $ Just $ P Wild,
|
|
|
|
|
|
|
|
|
|
match name $ Just . Name,
|
2022-05-03 18:49:09 -04:00
|
|
|
|
match symbol $ Just . Symbol . assert_total strTail,
|
|
|
|
|
|
|
|
|
|
-- [todo] other bases?
|
|
|
|
|
match (some $ digit <|> exact "_") $ Just . N . natToNumber . makeDec
|
2022-05-02 20:03:22 -04:00
|
|
|
|
]
|
2022-05-02 11:13:13 -04:00
|
|
|
|
|
|
|
|
|
export
|
|
|
|
|
lex : MonadThrow Error m => String -> m (List (WithBounds Token))
|
|
|
|
|
lex str =
|
2022-05-02 20:03:22 -04:00
|
|
|
|
let (res, (reason, line, col, str)) = lex tokens str in
|
|
|
|
|
case reason of
|
|
|
|
|
EndInput => pure $ mapMaybe sequence res
|
|
|
|
|
_ => let char = assert_total strIndex str 0 in
|
|
|
|
|
throw $ Err {reason, line, col, char}
|