quox/lib/Quox/Parser/Parser.idr

594 lines
17 KiB
Idris
Raw Permalink Normal View History

module Quox.Parser.Parser
import public Quox.Parser.Lexer
import public Quox.Parser.Syntax
import Data.Bool
import Data.Fin
import Data.Vect
import public Text.Parser
2023-03-31 13:27:38 -04:00
import Derive.Prelude
2023-03-31 13:27:38 -04:00
%language ElabReflection
%default total
public export
0 Grammar : Bool -> Type -> Type
Grammar = Core.Grammar () Token
%hide Core.Grammar
public export
data Error =
LexError Lexer.Error
| ParseError (List1 (ParsingError Token))
%hide Lexer.Error
%runElab derive "Error" [Show]
export
lexParseWith : {c : Bool} -> Grammar c a -> String -> Either Error a
lexParseWith grm input = do
toks <- mapFst LexError $ lex input
bimap ParseError fst $ parse (grm <* eof) toks
export
withLoc : {c : Bool} -> FileName -> (Grammar c (Loc -> a)) -> Grammar c a
withLoc fname act = bounds act <&> \res =>
2023-05-01 21:06:25 -04:00
if res.isIrrelevant then res.val noLoc
else res.val $ makeLoc fname res.bounds
export
defLoc : FileName -> (Loc -> a) -> Grammar False a
defLoc fname f = position <&> f . makeLoc fname
export
unused : FileName -> Grammar False PatVar
unused fname = defLoc fname Unused
||| reserved token, like punctuation or keywords etc
export
res : (str : String) -> (0 _ : IsReserved str) => Grammar True ()
res str = terminal "expected \"\{str}\"" $ guard . (== Reserved str)
||| optional reserved token, e.g. trailing comma
export
optRes : (str : String) -> (0 _ : IsReserved str) => Grammar False ()
optRes str = ignore $ optional $ res str
||| reserved token, then commit
export
resC : (str : String) -> (0 _ : IsReserved str) => Grammar True ()
resC str = do res str; commit
||| reserved token or fatal error
export
needRes : (str : String) -> (0 _ : IsReserved str) => Grammar True ()
needRes str = resC str <|> fatalError "expected \"\{str}\""
private
terminalMatchN_ : String -> List (TTImp, TTImp) -> Elab (Grammar True a)
terminalMatchN_ what matches = do
func <- check $ lam (lambdaArg `{x}) $
iCase `(x) implicitFalse $
map (\(l, r) => patClause l `(Just ~(r))) matches ++
[patClause `(_) `(Nothing)]
pure $ terminal "expected \{what}" func
private %macro
terminalMatchN : String -> List (TTImp, TTImp) -> Elab (Grammar True a)
terminalMatchN = terminalMatchN_
private %macro
terminalMatch : String -> TTImp -> TTImp -> Elab (Grammar True a)
terminalMatch what l r = terminalMatchN_ what [(l, r)]
||| tag without leading `'`
export
bareTag : Grammar True TagVal
2023-05-25 12:34:13 -04:00
bareTag = terminalMatchN "bare tag"
[(`(Name t), `(toDotsP t)), (`(Str s), `(s))]
||| tag with leading quote
export
tag : Grammar True TagVal
tag = terminalMatch "tag" `(Tag t) `(t)
||| natural number
export
nat : Grammar True Nat
nat = terminalMatch "natural number" `(Nat n) `(n)
||| string literal
export
strLit : Grammar True String
strLit = terminalMatch "string literal" `(Str s) `(s)
2023-05-21 14:09:34 -04:00
||| single-token universe, like ★0 or Type1
export
2023-05-21 14:09:34 -04:00
universeTok : Grammar True Universe
universeTok = terminalMatch "universe" `(TYPE u) `(u)
export
super : Grammar True Nat
super = terminalMatch "superscript number or '^'" `(Sup n) `(n)
||| possibly-qualified name
export
qname : Grammar True PName
qname = terminalMatch "name" `(Name n) `(n)
||| unqualified name
export
baseName : Grammar True PBaseName
baseName = terminalMatch "unqualified name" `(Name (MakePName [<] b)) `(b)
||| dimension constant (0 or 1)
export
dimConst : Grammar True DimConst
dimConst = terminalMatchN "dimension constant"
[(`(Nat 0), `(Zero)), (`(Nat 1), `(One))]
||| quantity (0, 1, or ω)
export
qtyVal : Grammar True Qty
qtyVal = terminalMatchN "quantity"
[(`(Nat 0), `(Zero)), (`(Nat 1), `(One)), (`(Reserved "ω"), `(Any))]
2023-03-26 08:40:54 -04:00
2023-05-21 14:09:34 -04:00
||| optional superscript number
export
displacement : Grammar False (Maybe Universe)
displacement = optional super
||| quantity (0, 1, or ω)
export
qty : FileName -> Grammar True PQty
qty fname = withLoc fname [|PQ qtyVal|]
||| pattern var (unqualified name or _)
export
patVar : FileName -> Grammar True PatVar
patVar fname = withLoc fname $
[|PV baseName|] <|> Unused <$ res "_"
||| dimension (without `@` prefix)
export
dim : FileName -> Grammar True PDim
dim fname = withLoc fname $ [|K dimConst|] <|> [|V baseName|]
||| dimension argument (with @)
export
dimArg : FileName -> Grammar True PDim
dimArg fname = do resC "@"; mustWork $ dim fname
delim : (o, c : String) -> (0 _ : IsReserved o) => (0 _ : IsReserved c) =>
{k : Bool} -> Grammar k a -> Grammar True a
delim o c p = resC o *> p <* needRes c
-- this stuff is Like This (rather than just being delim + sepEndBy{1})
-- so that it checks for the close bracket before trying another list element,
-- giving (imo) a better error
parameters (o, c, s : String)
{auto 0 _ : IsReserved o} {auto 0 _ : IsReserved c}
{auto 0 _ : IsReserved s}
(p : Grammar True a)
private
dsBeforeDelim, dsAfterDelim : Grammar True (List a)
dsBeforeDelim = [] <$ resC c <|> resC s *> assert_total dsAfterDelim
dsAfterDelim = [] <$ resC c <|> [|p :: assert_total dsBeforeDelim|]
export
delimSep1 : Grammar True (List1 a)
delimSep1 = resC o *> [|p ::: dsBeforeDelim|]
export
delimSep : Grammar True (List a)
delimSep = resC o *> dsAfterDelim
||| enum type, e.g. `{a, b, c.d, "e f g"}`
export
enumType : Grammar True (List TagVal)
enumType = delimSep "{" "}" "," bareTag
||| e.g. `case` or `case 1.`
export
caseIntro : FileName -> Grammar True PQty
caseIntro fname =
withLoc fname (PQ Zero <$ res "case0")
<|> withLoc fname (PQ One <$ res "case1")
<|> withLoc fname (PQ Any <$ res "caseω")
<|> delim "case" "." (qty fname)
export
qtyPatVar : FileName -> Grammar True (PQty, PatVar)
qtyPatVar fname = [|(,) (qty fname) (needRes "." *> patVar fname)|]
export
ptag : FileName -> Grammar True PTagVal
ptag fname = withLoc fname $ [|PT tag|]
public export
data PCasePat =
PPair PatVar PatVar Loc
| PTag PTagVal Loc
| PZero Loc
| PSucc PatVar PQty PatVar Loc
| PBox PatVar Loc
%runElab derive "PCasePat" [Eq, Ord, Show]
export
Located PCasePat where
(PPair _ _ loc).loc = loc
(PTag _ loc).loc = loc
(PZero loc).loc = loc
(PSucc _ _ _ loc).loc = loc
(PBox _ loc).loc = loc
||| either `zero` or `0`
export
zeroPat : Grammar True ()
zeroPat = resC "zero" <|> terminal "expected '0'" (guard . (== Nat 0))
export
casePat : FileName -> Grammar True PCasePat
casePat fname = withLoc fname $
delim "(" ")" [|PPair (patVar fname) (needRes "," *> patVar fname)|]
<|> [|PTag (ptag fname)|]
<|> PZero <$ zeroPat
<|> do p <- resC "succ" *> patVar fname
ih <- resC "," *> qtyPatVar fname
<|> [|(,) (defLoc fname $ PQ Zero) (unused fname)|]
pure $ PSucc p (fst ih) (snd ih)
<|> delim "[" "]" [|PBox (patVar fname)|]
<|> fatalError "invalid pattern"
2023-05-01 21:06:25 -04:00
export
term : FileName -> Grammar True PTerm
-- defined after all the subterm parsers
2023-04-25 20:23:59 -04:00
||| box term `[t]` or type `[π.A]`
2023-05-01 21:06:25 -04:00
export
boxTerm : FileName -> Grammar True PTerm
boxTerm fname = withLoc fname $ do
2023-04-25 20:23:59 -04:00
res "["; commit
2023-05-01 21:06:25 -04:00
q <- optional $ qty fname <* res "."
t <- mustWork $ assert_total term fname <* needRes "]"
2023-04-25 20:23:59 -04:00
pure $ maybe (Box t) (\q => BOX q t) q
||| tuple term like `(a, b)`, or parenthesised single term.
||| allows terminating comma. more than two elements are nested on the right:
||| `(a, b, c, d) = (a, (b, (c, d)))`.
2023-05-01 21:06:25 -04:00
export
tupleTerm : FileName -> Grammar True PTerm
tupleTerm fname = withLoc fname $ do
2023-05-01 21:06:25 -04:00
terms <- delimSep1 "(" ")" "," $ assert_total term fname
pure $ \loc => foldr1 (\s, t => Pair s t loc) terms
2023-04-25 20:23:59 -04:00
2023-05-21 14:09:34 -04:00
export
universe1 : Grammar True Universe
2023-05-21 14:33:42 -04:00
universe1 = universeTok <|> res "" *> option 0 super
2023-05-21 14:09:34 -04:00
2023-04-25 20:23:59 -04:00
||| argument/atomic term: single-token terms, or those with delimiters e.g.
||| `[t]`
2023-05-01 21:06:25 -04:00
export
termArg : FileName -> Grammar True PTerm
termArg fname = withLoc fname $
[|TYPE universe1|]
<|> [|Enum enumType|]
<|> [|Tag tag|]
<|> const <$> boxTerm fname
<|> Nat <$ res ""
<|> Zero <$ res "zero"
<|> [|fromNat nat|]
2023-05-21 14:09:34 -04:00
<|> [|V qname displacement|]
<|> const <$> tupleTerm fname
2023-04-25 20:23:59 -04:00
export
properTypeLine : FileName -> Grammar True (PatVar, PTerm)
properTypeLine fname = do
resC "("
i <- patVar fname <* resC "" <|> unused fname
t <- assert_total term fname <* needRes ")"
pure (i, t)
export
typeLine : FileName -> Grammar True (PatVar, PTerm)
typeLine fname =
properTypeLine fname <|> [|(,) (unused fname) (termArg fname)|]
||| optionally, two dimension arguments. if absent default to `@0 @1`
private
optDirection : FileName -> Grammar False (PDim, PDim)
optDirection fname = withLoc fname $ do
dims <- optional [|(,) (dimArg fname) (dimArg fname)|]
pure $ \loc => fromMaybe (K Zero loc, K One loc) dims
2023-05-01 21:06:25 -04:00
export
coeTerm : FileName -> Grammar True PTerm
coeTerm fname = withLoc fname $ do
resC "coe"
mustWork $ do
line <- typeLine fname
(p, q) <- optDirection fname
val <- termArg fname
pure $ Coe line p q val
public export
CompBranch : Type
CompBranch = (DimConst, PatVar, PTerm)
2023-04-25 20:23:59 -04:00
2023-05-01 21:06:25 -04:00
export
compBranch : FileName -> Grammar True CompBranch
2023-05-01 21:06:25 -04:00
compBranch fname =
[|(,,) dimConst (patVar fname) (needRes "" *> assert_total term fname)|]
private
checkCompTermBody : (PatVar, PTerm) -> PDim -> PDim -> PTerm -> PDim ->
CompBranch -> CompBranch -> Bounds ->
Grammar False (Loc -> PTerm)
checkCompTermBody a p q s r (e0, s0) (e1, s1) bounds =
case (e0, e1) of
(Zero, One) => pure $ Comp a p q s r s0 s1
(One, Zero) => pure $ Comp a p q s r s1 s0
(_, _) =>
fatalLoc bounds "body of 'comp' needs one 0 case and one 1 case"
2023-04-25 20:23:59 -04:00
2023-05-01 21:06:25 -04:00
export
compTerm : FileName -> Grammar True PTerm
compTerm fname = withLoc fname $ do
2023-04-25 20:23:59 -04:00
resC "comp"
mustWork $ do
a <- typeLine fname
(p, q) <- optDirection fname
s <- termArg fname; r <- dimArg fname
bodyStart <- bounds $ needRes "{"
s0 <- compBranch fname; needRes ";"
s1 <- compBranch fname; optRes ";"
bodyEnd <- bounds $ needRes "}"
let body = bounds $ mergeBounds bodyStart bodyEnd
checkCompTermBody a p q s r s0 s1 body
2023-04-25 20:23:59 -04:00
2023-05-01 21:06:25 -04:00
export
splitUniverseTerm : FileName -> Grammar True PTerm
2023-05-21 14:09:34 -04:00
splitUniverseTerm fname =
2023-05-21 14:33:42 -04:00
withLoc fname $ resC "" *> [|TYPE $ option 0 $ nat <|> super|]
-- some of this looks redundant, but when parsing a non-atomic term
2023-05-21 14:09:34 -04:00
-- this branch will be taken first
2023-04-25 20:23:59 -04:00
2023-05-01 21:06:25 -04:00
export
eqTerm : FileName -> Grammar True PTerm
eqTerm fname = withLoc fname $
resC "Eq" *> mustWork [|Eq (typeLine fname) (termArg fname) (termArg fname)|]
2023-04-25 20:23:59 -04:00
2023-05-01 21:06:25 -04:00
export
succTerm : FileName -> Grammar True PTerm
succTerm fname = withLoc fname $
resC "succ" *> mustWork [|Succ (termArg fname)|]
2023-04-25 20:23:59 -04:00
||| a dimension argument with an `@` prefix, or
||| a term argument with no prefix
2023-05-01 21:06:25 -04:00
export
anyArg : FileName -> Grammar True (Either PDim PTerm)
anyArg fname = dimArg fname <||> termArg fname
2023-04-25 20:23:59 -04:00
2023-05-01 21:06:25 -04:00
export
normalAppTerm : FileName -> Grammar True PTerm
normalAppTerm fname = withLoc fname $ do
head <- termArg fname
args <- many $ anyArg fname
pure $ \loc => foldl (ap loc) head args
where ap : Loc -> PTerm -> Either PDim PTerm -> PTerm
ap loc f (Left p) = DApp f p loc
ap loc f (Right s) = App f s loc
2023-04-25 20:23:59 -04:00
||| application term `f x @y z`, or other terms that look like application
||| like `succ` or `coe`.
2023-05-01 21:06:25 -04:00
export
appTerm : FileName -> Grammar True PTerm
appTerm fname =
coeTerm fname
<|> compTerm fname
<|> splitUniverseTerm fname
<|> eqTerm fname
<|> succTerm fname
<|> normalAppTerm fname
2023-04-25 20:23:59 -04:00
2023-05-01 21:06:25 -04:00
export
infixEqTerm : FileName -> Grammar True PTerm
infixEqTerm fname = withLoc fname $ do
l <- appTerm fname; commit
2023-05-01 21:06:25 -04:00
rest <- optional $ res "" *>
[|(,) (assert_total term fname) (needRes ":" *> appTerm fname)|]
let u = Unused $ onlyStart l.loc
pure $ \loc => maybe l (\rest => Eq (u, snd rest) l (fst rest) loc) rest
2023-04-25 20:23:59 -04:00
2023-05-01 21:06:25 -04:00
export
annTerm : FileName -> Grammar True PTerm
annTerm fname = withLoc fname $ do
tm <- infixEqTerm fname; commit
2023-05-01 21:06:25 -04:00
ty <- optional $ res "" *> assert_total term fname
pure $ \loc => maybe tm (\ty => Ann tm ty loc) ty
2023-04-25 20:23:59 -04:00
2023-05-01 21:06:25 -04:00
export
lamTerm : FileName -> Grammar True PTerm
lamTerm fname = withLoc fname $ do
2023-04-25 20:23:59 -04:00
k <- DLam <$ res "δ" <|> Lam <$ res "λ"
mustWork $ do
2023-05-01 21:06:25 -04:00
xs <- some $ patVar fname; needRes ""
body <- assert_total term fname; commit
pure $ \loc => foldr (\x, s => k x s loc) body xs
2023-04-25 20:23:59 -04:00
-- [todo] fix the backtracking in e.g. (F x y z × B)
2023-05-01 21:06:25 -04:00
export
properBinders : FileName -> Grammar True (List1 PatVar, PTerm)
2023-05-01 21:06:25 -04:00
properBinders fname = assert_total $ do
-- putting assert_total directly on `term`, in this one function,
-- doesn't work. i cannot tell why
2023-04-25 20:23:59 -04:00
res "("
xs <- some $ patVar fname; resC ":"
t <- term fname; needRes ")"
2023-04-25 20:23:59 -04:00
pure (xs, t)
2023-05-01 21:06:25 -04:00
export
piTerm : FileName -> Grammar True PTerm
piTerm fname = withLoc fname $ do
2023-05-01 21:06:25 -04:00
q <- qty fname; resC "."
dom <- piBinder; needRes ""
cod <- assert_total term fname; commit
pure $ \loc => foldr (\x, t => Pi q x (snd dom) t loc) cod (fst dom)
2023-04-25 20:23:59 -04:00
where
piBinder : Grammar True (List1 PatVar, PTerm)
piBinder = properBinders fname
<|> [|(,) [|singleton $ unused fname|] (termArg fname)|]
2023-04-25 20:23:59 -04:00
2023-05-01 21:06:25 -04:00
export
sigmaTerm : FileName -> Grammar True PTerm
sigmaTerm fname =
(properBinders fname >>= continueDep)
<|> (annTerm fname >>= continueNondep)
2023-04-25 20:23:59 -04:00
where
continueDep : (List1 PatVar, PTerm) -> Grammar True PTerm
continueDep (names, fst) = withLoc fname $ do
snd <- needRes "×" *> sigmaTerm fname
pure $ \loc => foldr (\x, snd => Sig x fst snd loc) snd names
cross : PTerm -> PTerm -> PTerm
cross l r = let loc = extend' l.loc r.loc.bounds in
Sig (Unused $ onlyStart l.loc) l r loc
2023-04-25 20:23:59 -04:00
continueNondep : PTerm -> Grammar False PTerm
continueNondep fst = do
rest <- optional $ resC "×" *> sepBy1 (res "×") (annTerm fname)
pure $ foldr1 cross $ fst ::: maybe [] toList rest
2023-04-25 20:23:59 -04:00
public export
PCaseArm : Type
PCaseArm = (PCasePat, PTerm)
2023-05-01 21:06:25 -04:00
export
caseArm : FileName -> Grammar True PCaseArm
2023-05-01 21:06:25 -04:00
caseArm fname =
[|(,) (casePat fname) (needRes "" *> assert_total term fname)|]
export
checkCaseArms : Loc -> List PCaseArm -> Grammar False PCaseBody
checkCaseArms loc [] = pure $ CaseEnum [] loc
checkCaseArms loc ((PPair x y _, rhs) :: rest) =
if null rest then pure $ CasePair (x, y) rhs loc
else fatalError "unexpected pattern after pair"
checkCaseArms loc ((PTag tag _, rhs1) :: rest) = do
let rest = for rest $ \case
(PTag tag _, rhs) => Just (tag, rhs)
_ => Nothing
maybe (fatalError "expected all patterns to be tags")
(\rest => pure $ CaseEnum ((tag, rhs1) :: rest) loc) rest
checkCaseArms loc ((PZero _, rhs1) :: rest) = do
let [(PSucc p q ih _, rhs2)] = rest
| _ => fatalError "expected succ pattern after zero"
pure $ CaseNat rhs1 (p, q, ih, rhs2) loc
checkCaseArms loc ((PSucc p q ih _, rhs1) :: rest) = do
let [(PZero _, rhs2)] = rest
| _ => fatalError "expected zero pattern after succ"
pure $ CaseNat rhs2 (p, q, ih, rhs1) loc
checkCaseArms loc ((PBox x _, rhs) :: rest) =
if null rest then pure $ CaseBox x rhs loc
else fatalError "unexpected pattern after box"
2023-05-01 21:06:25 -04:00
export
caseBody : FileName -> Grammar True PCaseBody
caseBody fname = do
body <- bounds $ delimSep "{" "}" ";" $ caseArm fname
let loc = makeLoc fname body.bounds
checkCaseArms loc body.val
2023-04-25 20:23:59 -04:00
2023-05-01 21:06:25 -04:00
export
caseReturn : FileName -> Grammar True (PatVar, PTerm)
caseReturn fname = do
x <- patVar fname <* resC "" <|> unused fname
2023-05-01 21:06:25 -04:00
ret <- assert_total term fname
pure (x, ret)
2023-04-25 20:23:59 -04:00
2023-05-01 21:06:25 -04:00
export
caseTerm : FileName -> Grammar True PTerm
caseTerm fname = withLoc fname $ do
2023-05-01 21:06:25 -04:00
qty <- caseIntro fname; commit
head <- mustWork $ assert_total term fname; needRes "return"
ret <- mustWork $ caseReturn fname; needRes "of"
body <- mustWork $ caseBody fname
2023-04-25 20:23:59 -04:00
pure $ Case qty head ret body
2023-05-01 21:06:25 -04:00
-- export
-- term : FileName -> Grammar True PTerm
term fname = lamTerm fname
<|> caseTerm fname
<|> piTerm fname
<|> sigmaTerm fname
2023-04-17 15:41:00 -04:00
2023-05-01 21:06:25 -04:00
export
decl : FileName -> Grammar True PDecl
||| `def` alone means `defω`
export
defIntro : FileName -> Grammar True PQty
defIntro fname =
withLoc fname (PQ Zero <$ resC "def0")
<|> withLoc fname (PQ Any <$ resC "defω")
<|> do pos <- bounds $ resC "def"
let any = PQ Any $ makeLoc fname pos.bounds
option any $ qty fname <* needRes "."
2023-04-25 20:23:59 -04:00
2023-05-01 21:06:25 -04:00
export
definition : FileName -> Grammar True PDefinition
definition fname = withLoc fname $ do
qty <- defIntro fname
2023-04-25 20:23:59 -04:00
name <- baseName
type <- optional $ resC ":" *> mustWork (term fname)
term <- needRes "=" *> mustWork (term fname)
2023-04-25 20:23:59 -04:00
optRes ";"
pure $ MkPDef qty name type term
2023-04-25 20:23:59 -04:00
2023-05-01 21:06:25 -04:00
export
namespace_ : FileName -> Grammar True PNamespace
namespace_ fname = withLoc fname $ do
2023-04-25 20:23:59 -04:00
ns <- resC "namespace" *> qname; needRes "{"
decls <- nsInner; optRes ";"
pure $ MkPNamespace (ns.mods :< ns.base) decls
where
nsInner : Grammar True (List PDecl)
nsInner = [] <$ resC "}"
2023-05-01 21:06:25 -04:00
<|> [|(assert_total decl fname <* commit) :: assert_total nsInner|]
2023-04-25 20:23:59 -04:00
decl fname = [|PDef $ definition fname|] <|> [|PNs $ namespace_ fname|]
2023-05-01 21:06:25 -04:00
export
load : FileName -> Grammar True PTopLevel
load fname = withLoc fname $
resC "load" *> mustWork [|PLoad strLit|] <* optRes ";"
2023-05-01 21:06:25 -04:00
export
topLevel : FileName -> Grammar True PTopLevel
topLevel fname = load fname <|> [|PD $ decl fname|]
2023-05-01 21:06:25 -04:00
export
input : FileName -> Grammar False (List PTopLevel)
input fname = [] <$ eof
2023-05-01 21:06:25 -04:00
<|> [|(topLevel fname <* commit) :: assert_total input fname|]
2023-05-01 21:06:25 -04:00
export
lexParseTerm : FileName -> String -> Either Error PTerm
lexParseTerm = lexParseWith . term
2023-05-01 21:06:25 -04:00
export
lexParseInput : FileName -> String -> Either Error (List PTopLevel)
lexParseInput = lexParseWith . input