start some lexer stuff

This commit is contained in:
rhiannon morris 2022-05-02 17:13:13 +02:00
parent df3505a04c
commit f503cf5734
4 changed files with 239 additions and 1 deletions

View file

@ -28,6 +28,7 @@ modules =
Quox.Syntax.Term.Subst,
Quox.Syntax.Universe,
Quox.Syntax.Var,
Quox.Lexer,
Quox.Context,
Quox.Equal,

129
src/Quox/Lexer.idr Normal file
View file

@ -0,0 +1,129 @@
module Quox.Lexer
import Quox.Error
import Data.String
import public Text.Lexer
public export
record Error where
constructor Err
line, col : Int
char : Char
public export
data Punc
= LParen | RParen
| LSquare | RSquare
| LBrace | RBrace
| Comma
| Colon | DblColon
| Arrow | DblArrow
| Times | Triangle
| Wild
puncRepr : Punc -> Nat
puncRepr = \case
LParen => 0; RParen => 1; LSquare => 2; RSquare => 3
LBrace => 4; RBrace => 5; Comma => 5
Colon => 6; DblColon => 7; Arrow => 8; DblArrow => 9
Times => 10; Triangle => 11
Wild => 12
export Eq Punc where (==) = (==) `on` puncRepr
export Ord Punc where compare = compare `on` puncRepr
export
Show Punc where
show = \case
LParen => "'('"; RParen => "')'"; LSquare => "'['"; RSquare => "']'"
LBrace => "'{'"; RBrace => "'}'"; Comma => "','"
Colon => "':'"; DblColon => "'∷'"; Arrow => "'→'"; DblArrow => "'⇒'"
Times => "'×'"; Triangle => "'⊲'"
Wild => "'_'"
public export
data Kind
= P Punc
| Name | Symbol
kindRepr : Kind -> (Nat, Nat)
kindRepr (P p) = (0, puncRepr p)
kindRepr Name = (1, 0)
kindRepr Symbol = (2, 0)
export Eq Kind where (==) = (==) `on` kindRepr
export Ord Kind where compare = compare `on` kindRepr
export
Show Kind where
show (P p) = show p
show Name = "Name"
show Symbol = "Symbol"
export
TokenKind Kind where
TokType (P _) = ()
TokType Name = String
TokType Symbol = String
tokValue (P _) _ = ()
tokValue Name x = x
tokValue Symbol x = assert_total strTail x
Token' = Token (Maybe Kind)
Token = Token Kind
TokenMap' = TokenMap Token'
TokenMap = TokenMap Token
nameStart = pred $ \c => isAlpha c || c == '_'
nameCont = pred $ \c => isAlphaNum c || c == '_' || c == '\''
name = nameStart <+> many nameCont <+> reject nameCont
wild = exact "_" <+> reject nameCont
%hide Text.Lexer.symbol
symbol = exact "'" <+> name
tokens = toTokenMap [
(lineComment (exact "--"), Nothing),
(blockComment (exact "{-") (exact "-}"), Nothing),
(spaces, Nothing),
(exact "(", Just $ P LParen), (exact ")", Just $ P RParen),
(exact "[", Just $ P LSquare), (exact "]", Just $ P RSquare),
(exact "{", Just $ P LBrace), (exact "}", Just $ P RBrace),
(exact ",", Just $ P Comma),
(exact "::" <|> exact "", Just $ P DblColon),
(exact ":", Just $ P Colon),
(exact "->" <|> exact "", Just $ P Arrow),
(exact "=>" <|> exact "", Just $ P DblArrow),
(exact "**" <|> exact "×", Just $ P Times),
(exact "<<" <|> exact "", Just $ P Triangle),
(wild, Just $ P Wild),
(name, Just Name), (symbol, Just Symbol)
]
sequenceT : Token (Maybe Kind) -> Maybe (Token Kind)
sequenceT tok =
case tok.kind of
Just k => Just $ {kind := k} tok
Nothing => Nothing
export
lex : MonadThrow Error m => String -> m (List (WithBounds Token))
lex str =
let (res, (line, col, str)) = lex tokens str in
case asList str of
[] => pure $ mapMaybe (traverse sequenceT) res
c :: _ => throw $ Err {line, col, char = c}

View file

@ -7,7 +7,10 @@ import Tests.Equal
import System
allTests = [Equal.tests]
allTests = [
Lexer.tests,
skip Equal.tests
]
main = do
opts <- getTestOpts

105
tests/Tests/Lexer.idr Normal file
View file

@ -0,0 +1,105 @@
module Tests.Lexer
import Quox.Lexer
import TAP
export
ToInfo Error where
toInfo (Err line col char) =
[("line", show line), ("col", show col), ("char", show char)]
data ExtraError
= WrongAnswer (List Kind) (List Kind)
| TestFailed (List Kind)
ToInfo ExtraError where
toInfo (WrongAnswer exp got) =
[("expected", show exp), ("received", show got)]
toInfo (TestFailed got) = [("failed", show got)]
parameters (label : String) (input : String)
acceptsSuchThat' : (List Kind -> Maybe ExtraError) -> Test
acceptsSuchThat' p = test {es = [Lexer.Error, ExtraError]} label $ do
res <- map (kind . val) <$> lex input
case p res of
Just err => throw err
Nothing => pure ()
acceptsSuchThat : (List Kind -> Bool) -> Test
acceptsSuchThat p = acceptsSuchThat' $ \res =>
if p res then Nothing else Just $ TestFailed res
acceptsWith : List Kind -> Test
acceptsWith expect = acceptsSuchThat' $ \res =>
if res == expect then Nothing else Just $ WrongAnswer expect res
accepts : Test
accepts = acceptsSuchThat $ const True
rejects : Test
rejects = testThrows {es = [Lexer.Error]} label $ delay $
map (kind . val) <$> lex input
parameters (input : String) {default False esc : Bool}
show' : String -> String
show' s = if esc then show s else "\"\{s}\""
acceptsWith' : List Kind -> Test
acceptsWith' = acceptsWith (show' input) input
accepts' : Test
accepts' = accepts (show' input) input
rejects' : Test
rejects' = rejects "\{show' input} (reject)" input
tests = "lexer" :- [
"comments" :- [
acceptsWith' "" [],
acceptsWith' " \n \t\t " [] {esc = True},
acceptsWith' "-- a" [],
acceptsWith' "{- -}" [],
acceptsWith' "{--}" [],
acceptsWith' "{------}" [],
acceptsWith' "{- {- -} -}" [],
acceptsWith' "{- } -}" [],
rejects' "{-}",
rejects' "{- {- -}",
acceptsWith' "( -- comment \n )" [P LParen, P RParen] {esc = True}
],
"punctuation" :- [
acceptsWith' "({[:,::]})"
[P LParen, P LBrace, P LSquare,
P Colon, P Comma, P DblColon,
P RSquare, P RBrace, P RParen],
acceptsWith' " ( { [ : , :: ] } ) "
[P LParen, P LBrace, P LSquare,
P Colon, P Comma, P DblColon,
P RSquare, P RBrace, P RParen],
acceptsWith' "-> → => ⇒ ** × << ⊲ ∷"
[P Arrow, P Arrow, P DblArrow, P DblArrow,
P Times, P Times, P Triangle, P Triangle, P DblColon],
acceptsWith' "_" [P Wild]
],
"names & symbols" :- [
acceptsWith' "a" [Name],
acceptsWith' "abc" [Name],
acceptsWith' "_a" [Name],
acceptsWith' "a_" [Name],
acceptsWith' "a_b" [Name],
acceptsWith' "abc'" [Name],
acceptsWith' "a'b'c''" [Name],
acceptsWith' "abc123" [Name],
acceptsWith' "ab cd" [Name, Name],
acceptsWith' "ab{--}cd" [Name, Name],
acceptsWith' "'a" [Symbol],
acceptsWith' "'ab" [Symbol],
acceptsWith' "'_b" [Symbol],
rejects' "'"
]
]