quox/lib/Quox/Parser/Parser.idr
2023-09-18 21:53:38 +02:00

635 lines
19 KiB
Idris
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

module Quox.Parser.Parser
import public Quox.Parser.Lexer
import public Quox.Parser.Syntax
import Data.Bool
import Data.Fin
import Data.Vect
import public Text.Parser
import Derive.Prelude
%language ElabReflection
%default total
public export
0 Grammar : Bool -> Type -> Type
Grammar = Core.Grammar () Token
%hide Core.Grammar
public export
data Error =
LexError Lexer.Error
| ParseError (List1 (ParsingError Token))
%hide Lexer.Error
%runElab derive "Error" [Show]
export
lexParseWith : {c : Bool} -> Grammar c a -> String -> Either Error a
lexParseWith grm input = do
toks <- mapFst LexError $ lex input
bimap ParseError fst $ parse (grm <* eof) toks
export
withLoc : {c : Bool} -> FileName -> (Grammar c (Loc -> a)) -> Grammar c a
withLoc fname act = bounds act <&> \res =>
if res.isIrrelevant then res.val noLoc
else res.val $ makeLoc fname res.bounds
export
defLoc : FileName -> (Loc -> a) -> Grammar False a
defLoc fname f = position <&> f . makeLoc fname
export
unused : FileName -> Grammar False PatVar
unused fname = defLoc fname Unused
||| reserved token, like punctuation or keywords etc
export
res : (str : String) -> (0 _ : IsReserved str) => Grammar True ()
res str = terminal "expected \"\{str}\"" $ guard . (== Reserved str)
||| optional reserved token, e.g. trailing comma
export
optRes : (str : String) -> (0 _ : IsReserved str) => Grammar False ()
optRes str = ignore $ optional $ res str
||| reserved token, then commit
export
resC : (str : String) -> (0 _ : IsReserved str) => Grammar True ()
resC str = do res str; commit
||| reserved token or fatal error
export
needRes : (str : String) -> (0 _ : IsReserved str) => Grammar True ()
needRes str = resC str <|> fatalError "expected \"\{str}\""
private
terminalMatchN_ : String -> List (TTImp, TTImp) -> Elab (Grammar True a)
terminalMatchN_ what matches = do
func <- check $ lam (lambdaArg `{x}) $
iCase `(x) implicitFalse $
map (\(l, r) => patClause l `(Just ~(r))) matches ++
[patClause `(_) `(Nothing)]
pure $ terminal "expected \{what}" func
private %macro
terminalMatchN : String -> List (TTImp, TTImp) -> Elab (Grammar True a)
terminalMatchN = terminalMatchN_
private %macro
terminalMatch : String -> TTImp -> TTImp -> Elab (Grammar True a)
terminalMatch what l r = terminalMatchN_ what [(l, r)]
||| tag without leading `'`
export
bareTag : Grammar True TagVal
bareTag = terminalMatchN "bare tag"
[(`(Name t), `(toDotsP t)), (`(Str s), `(s))]
||| tag with leading quote
export
tag : Grammar True TagVal
tag = terminalMatch "tag" `(Tag t) `(t)
||| natural number
export
nat : Grammar True Nat
nat = terminalMatch "natural number" `(Nat n) `(n)
||| string literal
export
strLit : Grammar True String
strLit = terminalMatch "string literal" `(Str s) `(s)
||| single-token universe, like ★0 or Type1
export
universeTok : Grammar True Universe
universeTok = terminalMatch "universe" `(TYPE u) `(u)
export
super : Grammar True Nat
super = terminalMatch "superscript number or '^'" `(Sup n) `(n)
||| possibly-qualified name
export
qname : Grammar True PName
qname = terminalMatch "name" `(Name n) `(n)
||| unqualified name
export
baseName : Grammar True PBaseName
baseName = terminalMatch "unqualified name" `(Name (MakePName [<] b)) `(b)
||| dimension constant (0 or 1)
export
dimConst : Grammar True DimConst
dimConst = terminalMatchN "dimension constant"
[(`(Nat 0), `(Zero)), (`(Nat 1), `(One))]
||| quantity (0, 1, or ω)
export
qtyVal : Grammar True Qty
qtyVal = terminalMatchN "quantity"
[(`(Nat 0), `(Zero)), (`(Nat 1), `(One)), (`(Reserved "ω"), `(Any))]
||| optional superscript number
export
displacement : Grammar False (Maybe Universe)
displacement = optional super
||| quantity (0, 1, or ω)
export
qty : FileName -> Grammar True PQty
qty fname = withLoc fname [|PQ qtyVal|]
||| pattern var (unqualified name or _)
export
patVar : FileName -> Grammar True PatVar
patVar fname = withLoc fname $
[|PV baseName|] <|> Unused <$ res "_"
||| dimension (without `@` prefix)
export
dim : FileName -> Grammar True PDim
dim fname = withLoc fname $ [|K dimConst|] <|> [|V baseName|]
||| dimension argument (with @)
export
dimArg : FileName -> Grammar True PDim
dimArg fname = do resC "@"; mustWork $ dim fname
delim : (o, c : String) -> (0 _ : IsReserved o) => (0 _ : IsReserved c) =>
{k : Bool} -> Grammar k a -> Grammar True a
delim o c p = resC o *> p <* needRes c
-- this stuff is Like This (rather than just being delim + sepEndBy{1})
-- so that it checks for the close bracket before trying another list element,
-- giving (imo) a better error
parameters (o, c, s : String)
{auto 0 _ : IsReserved o} {auto 0 _ : IsReserved c}
{auto 0 _ : IsReserved s}
(p : Grammar True a)
private
dsBeforeDelim, dsAfterDelim : Grammar True (List a)
dsBeforeDelim = [] <$ resC c <|> resC s *> assert_total dsAfterDelim
dsAfterDelim = [] <$ resC c <|> [|p :: assert_total dsBeforeDelim|]
export
delimSep1 : Grammar True (List1 a)
delimSep1 = resC o *> [|p ::: dsBeforeDelim|]
export
delimSep : Grammar True (List a)
delimSep = resC o *> dsAfterDelim
||| enum type, e.g. `{a, b, c.d, "e f g"}`
export
enumType : Grammar True (List TagVal)
enumType = delimSep "{" "}" "," bareTag
||| e.g. `case1` or `case 1.`
export
caseIntro : FileName -> Grammar True PQty
caseIntro fname =
withLoc fname (PQ Zero <$ res "case0")
<|> withLoc fname (PQ One <$ res "case1")
<|> withLoc fname (PQ Any <$ res "caseω")
<|> do resC "case"
qty fname <* needRes "." <|> defLoc fname (PQ One)
export
qtyPatVar : FileName -> Grammar True (PQty, PatVar)
qtyPatVar fname =
[|(,) (qty fname) (needRes "." *> patVar fname)|]
<|> [|(,) (defLoc fname $ PQ One) (patVar fname)|]
export
ptag : FileName -> Grammar True PTagVal
ptag fname = withLoc fname $ [|PT tag|]
public export
data PCasePat =
PPair PatVar PatVar Loc
| PTag PTagVal Loc
| PZero Loc
| PSucc PatVar PQty PatVar Loc
| PBox PatVar Loc
%runElab derive "PCasePat" [Eq, Ord, Show]
export
Located PCasePat where
(PPair _ _ loc).loc = loc
(PTag _ loc).loc = loc
(PZero loc).loc = loc
(PSucc _ _ _ loc).loc = loc
(PBox _ loc).loc = loc
||| either `zero` or `0`
export
zeroPat : Grammar True ()
zeroPat = resC "zero" <|> terminal "expected '0'" (guard . (== Nat 0))
export
casePat : FileName -> Grammar True PCasePat
casePat fname = withLoc fname $
delim "(" ")" [|PPair (patVar fname) (needRes "," *> patVar fname)|]
<|> [|PTag (ptag fname)|]
<|> PZero <$ zeroPat
<|> do p <- resC "succ" *> patVar fname
ih <- resC "," *> qtyPatVar fname
<|> [|(,) (defLoc fname $ PQ Zero) (unused fname)|]
pure $ PSucc p (fst ih) (snd ih)
<|> delim "[" "]" [|PBox (patVar fname)|]
<|> fatalError "invalid pattern"
export
term : FileName -> Grammar True PTerm
-- defined after all the subterm parsers
||| box term `[t]` or type `[π.A]`
export
boxTerm : FileName -> Grammar True PTerm
boxTerm fname = withLoc fname $ do
res "["; commit
q <- optional $ qty fname <* res "."
t <- mustWork $ assert_total term fname <* needRes "]"
pure $ maybe (Box t) (\q => BOX q t) q
||| tuple term like `(a, b)`, or parenthesised single term.
||| allows terminating comma. more than two elements are nested on the right:
||| `(a, b, c, d) = (a, (b, (c, d)))`.
export
tupleTerm : FileName -> Grammar True PTerm
tupleTerm fname = withLoc fname $ do
terms <- delimSep1 "(" ")" "," $ assert_total term fname
pure $ \loc => foldr1 (\s, t => Pair s t loc) terms
export
universe1 : Grammar True Universe
universe1 = universeTok <|> res "" *> option 0 super
||| argument/atomic term: single-token terms, or those with delimiters e.g.
||| `[t]`
export
termArg : FileName -> Grammar True PTerm
termArg fname = withLoc fname $
[|TYPE universe1|]
<|> [|Enum enumType|]
<|> [|Tag tag|]
<|> const <$> boxTerm fname
<|> Nat <$ res ""
<|> Zero <$ res "zero"
<|> [|fromNat nat|]
<|> [|V qname displacement|]
<|> const <$> tupleTerm fname
export
properTypeLine : FileName -> Grammar True (PatVar, PTerm)
properTypeLine fname = do
resC "("
i <- patVar fname <* resC "" <|> unused fname
t <- assert_total term fname <* needRes ")"
pure (i, t)
export
typeLine : FileName -> Grammar True (PatVar, PTerm)
typeLine fname =
properTypeLine fname <|> [|(,) (unused fname) (termArg fname)|]
||| optionally, two dimension arguments. if absent default to `@0 @1`
private
optDirection : FileName -> Grammar False (PDim, PDim)
optDirection fname = withLoc fname $ do
dims <- optional [|(,) (dimArg fname) (dimArg fname)|]
pure $ \loc => fromMaybe (K Zero loc, K One loc) dims
export
coeTerm : FileName -> Grammar True PTerm
coeTerm fname = withLoc fname $ do
resC "coe"
mustWork $ do
line <- typeLine fname
(p, q) <- optDirection fname
val <- termArg fname
pure $ Coe line p q val
public export
CompBranch : Type
CompBranch = (DimConst, PatVar, PTerm)
export
compBranch : FileName -> Grammar True CompBranch
compBranch fname =
[|(,,) dimConst (patVar fname) (needRes "" *> assert_total term fname)|]
private
checkCompTermBody : (PatVar, PTerm) -> PDim -> PDim -> PTerm -> PDim ->
CompBranch -> CompBranch -> Bounds ->
Grammar False (Loc -> PTerm)
checkCompTermBody a p q s r (e0, s0) (e1, s1) bounds =
case (e0, e1) of
(Zero, One) => pure $ Comp a p q s r s0 s1
(One, Zero) => pure $ Comp a p q s r s1 s0
(_, _) =>
fatalLoc bounds "body of 'comp' needs one 0 case and one 1 case"
export
compTerm : FileName -> Grammar True PTerm
compTerm fname = withLoc fname $ do
resC "comp"
mustWork $ do
a <- typeLine fname
(p, q) <- optDirection fname
s <- termArg fname; r <- dimArg fname
bodyStart <- bounds $ needRes "{"
s0 <- compBranch fname; needRes ";"
s1 <- compBranch fname; optRes ";"
bodyEnd <- bounds $ needRes "}"
let body = bounds $ mergeBounds bodyStart bodyEnd
checkCompTermBody a p q s r s0 s1 body
export
splitUniverseTerm : FileName -> Grammar True PTerm
splitUniverseTerm fname =
withLoc fname $ resC "" *> [|TYPE $ option 0 $ nat <|> super|]
-- some of this looks redundant, but when parsing a non-atomic term
-- this branch will be taken first
export
eqTerm : FileName -> Grammar True PTerm
eqTerm fname = withLoc fname $
resC "Eq" *> mustWork [|Eq (typeLine fname) (termArg fname) (termArg fname)|]
export
resAppTerm : FileName -> (word : String) -> (0 _ : IsReserved word) =>
(PTerm -> Loc -> PTerm) -> Grammar True PTerm
resAppTerm fname word f = withLoc fname $
resC word *> mustWork [|f (termArg fname)|]
export
succTerm : FileName -> Grammar True PTerm
succTerm fname = resAppTerm fname "succ" Succ
export
fstTerm : FileName -> Grammar True PTerm
fstTerm fname = resAppTerm fname "fst" Fst
export
sndTerm : FileName -> Grammar True PTerm
sndTerm fname = resAppTerm fname "snd" Snd
||| a dimension argument with an `@` prefix, or
||| a term argument with no prefix
export
anyArg : FileName -> Grammar True (Either PDim PTerm)
anyArg fname = dimArg fname <||> termArg fname
export
normalAppTerm : FileName -> Grammar True PTerm
normalAppTerm fname = withLoc fname $ do
head <- termArg fname
args <- many $ anyArg fname
pure $ \loc => foldl (ap loc) head args
where ap : Loc -> PTerm -> Either PDim PTerm -> PTerm
ap loc f (Left p) = DApp f p loc
ap loc f (Right s) = App f s loc
||| application term `f x @y z`, or other terms that look like application
||| like `succ` or `coe`.
export
appTerm : FileName -> Grammar True PTerm
appTerm fname =
coeTerm fname
<|> compTerm fname
<|> splitUniverseTerm fname
<|> eqTerm fname
<|> succTerm fname
<|> fstTerm fname
<|> sndTerm fname
<|> normalAppTerm fname
export
infixEqTerm : FileName -> Grammar True PTerm
infixEqTerm fname = withLoc fname $ do
l <- appTerm fname; commit
rest <- optional $ res "" *>
[|(,) (assert_total term fname) (needRes ":" *> appTerm fname)|]
let u = Unused $ onlyStart l.loc
pure $ \loc => maybe l (\rest => Eq (u, snd rest) l (fst rest) loc) rest
export
annTerm : FileName -> Grammar True PTerm
annTerm fname = withLoc fname $ do
tm <- infixEqTerm fname; commit
ty <- optional $ res "" *> assert_total term fname
pure $ \loc => maybe tm (\ty => Ann tm ty loc) ty
export
lamTerm : FileName -> Grammar True PTerm
lamTerm fname = withLoc fname $ do
k <- DLam <$ res "δ" <|> Lam <$ res "λ"
mustWork $ do
xs <- some $ patVar fname; needRes ""
body <- assert_total term fname; commit
pure $ \loc => foldr (\x, s => k x s loc) body xs
-- [todo] fix the backtracking in e.g. (F x y z × B)
export
properBinders : FileName -> Grammar True (List1 PatVar, PTerm)
properBinders fname = assert_total $ do
-- putting assert_total directly on `term`, in this one function,
-- doesn't work. i cannot tell why
res "("
xs <- some $ patVar fname; resC ":"
t <- term fname; needRes ")"
pure (xs, t)
export
sigmaTerm : FileName -> Grammar True PTerm
sigmaTerm fname =
(properBinders fname >>= continueDep)
<|> (annTerm fname >>= continueNondep)
where
continueDep : (List1 PatVar, PTerm) -> Grammar True PTerm
continueDep (names, fst) = withLoc fname $ do
snd <- needRes "×" *> sigmaTerm fname
pure $ \loc => foldr (\x, snd => Sig x fst snd loc) snd names
cross : PTerm -> PTerm -> PTerm
cross l r = let loc = extend' l.loc r.loc.bounds in
Sig (Unused $ onlyStart l.loc) l r loc
continueNondep : PTerm -> Grammar False PTerm
continueNondep fst = do
rest <- optional $ resC "×" *> sepBy1 (res "×") (annTerm fname)
pure $ foldr1 cross $ fst ::: maybe [] toList rest
export
piTerm : FileName -> Grammar True PTerm
piTerm fname = withLoc fname $ do
q <- [|GivenQ $ qty fname <* resC "."|] <|> defLoc fname DefaultQ
dom <- [|Dep $ properBinders fname|] <|> [|Nondep $ ndDom q fname|]
cod <- optional $ do resC ""; assert_total term fname <* commit
when (needCod q dom && isNothing cod) $ fail "missing function type result"
pure $ maybe (const $ toTerm dom) (makePi q dom) cod
where
data PiQty = GivenQ PQty | DefaultQ Loc
data PiDom = Dep (List1 PatVar, PTerm) | Nondep PTerm
ndDom : PiQty -> FileName -> Grammar True PTerm
ndDom (GivenQ _) = termArg -- 「1.(List A)」, not 「1.List A」
ndDom (DefaultQ _) = sigmaTerm
needCod : PiQty -> PiDom -> Bool
needCod (DefaultQ _) (Nondep _) = False
needCod _ _ = True
toTerm : PiDom -> PTerm
toTerm (Dep (_, s)) = s
toTerm (Nondep s) = s
toQty : PiQty -> PQty
toQty (GivenQ qty) = qty
toQty (DefaultQ loc) = PQ One loc
toDoms : PQty -> PiDom -> List1 (PQty, PatVar, PTerm)
toDoms qty (Dep (xs, s)) = [(qty, x, s) | x <- xs]
toDoms qty (Nondep s) = singleton (qty, Unused s.loc, s)
makePi : PiQty -> PiDom -> PTerm -> Loc -> PTerm
makePi q doms cod loc =
foldr (\(q, x, s), t => Pi q x s t loc) cod $ toDoms (toQty q) doms
public export
PCaseArm : Type
PCaseArm = (PCasePat, PTerm)
export
caseArm : FileName -> Grammar True PCaseArm
caseArm fname =
[|(,) (casePat fname) (needRes "" *> assert_total term fname)|]
export
checkCaseArms : Loc -> List PCaseArm -> Grammar False PCaseBody
checkCaseArms loc [] = pure $ CaseEnum [] loc
checkCaseArms loc ((PPair x y _, rhs) :: rest) =
if null rest then pure $ CasePair (x, y) rhs loc
else fatalError "unexpected pattern after pair"
checkCaseArms loc ((PTag tag _, rhs1) :: rest) = do
let rest = for rest $ \case
(PTag tag _, rhs) => Just (tag, rhs)
_ => Nothing
maybe (fatalError "expected all patterns to be tags")
(\rest => pure $ CaseEnum ((tag, rhs1) :: rest) loc) rest
checkCaseArms loc ((PZero _, rhs1) :: rest) = do
let [(PSucc p q ih _, rhs2)] = rest
| _ => fatalError "expected succ pattern after zero"
pure $ CaseNat rhs1 (p, q, ih, rhs2) loc
checkCaseArms loc ((PSucc p q ih _, rhs1) :: rest) = do
let [(PZero _, rhs2)] = rest
| _ => fatalError "expected zero pattern after succ"
pure $ CaseNat rhs2 (p, q, ih, rhs1) loc
checkCaseArms loc ((PBox x _, rhs) :: rest) =
if null rest then pure $ CaseBox x rhs loc
else fatalError "unexpected pattern after box"
export
caseBody : FileName -> Grammar True PCaseBody
caseBody fname = do
body <- bounds $ delimSep "{" "}" ";" $ caseArm fname
let loc = makeLoc fname body.bounds
checkCaseArms loc body.val
export
caseReturn : FileName -> Grammar True (PatVar, PTerm)
caseReturn fname = do
x <- patVar fname <* resC "" <|> unused fname
ret <- assert_total term fname
pure (x, ret)
export
caseTerm : FileName -> Grammar True PTerm
caseTerm fname = withLoc fname $ do
qty <- caseIntro fname; commit
head <- mustWork $ assert_total term fname; needRes "return"
ret <- mustWork $ caseReturn fname; needRes "of"
body <- mustWork $ caseBody fname
pure $ Case qty head ret body
-- export
-- term : FileName -> Grammar True PTerm
term fname = lamTerm fname
<|> caseTerm fname
<|> piTerm fname
<|> sigmaTerm fname
export
decl : FileName -> Grammar True PDecl
||| `def` alone means `defω`
export
defIntro : FileName -> Grammar True PQty
defIntro fname =
withLoc fname (PQ Zero <$ resC "def0")
<|> withLoc fname (PQ Any <$ resC "defω")
<|> do pos <- bounds $ resC "def"
let any = PQ Any $ makeLoc fname pos.bounds
option any $ qty fname <* needRes "."
export
definition : FileName -> Grammar True PDefinition
definition fname = withLoc fname $ do
qty <- defIntro fname
name <- baseName
type <- optional $ resC ":" *> mustWork (term fname)
term <- needRes "=" *> mustWork (term fname)
optRes ";"
pure $ MkPDef qty name type term
export
namespace_ : FileName -> Grammar True PNamespace
namespace_ fname = withLoc fname $ do
ns <- resC "namespace" *> qname; needRes "{"
decls <- nsInner; optRes ";"
pure $ MkPNamespace (ns.mods :< ns.base) decls
where
nsInner : Grammar True (List PDecl)
nsInner = [] <$ resC "}"
<|> [|(assert_total decl fname <* commit) :: assert_total nsInner|]
decl fname = [|PDef $ definition fname|] <|> [|PNs $ namespace_ fname|]
export
load : FileName -> Grammar True PTopLevel
load fname = withLoc fname $
resC "load" *> mustWork [|PLoad strLit|] <* optRes ";"
export
topLevel : FileName -> Grammar True PTopLevel
topLevel fname = load fname <|> [|PD $ decl fname|]
export
input : FileName -> Grammar False (List PTopLevel)
input fname = [] <$ eof
<|> [|(topLevel fname <* commit) :: assert_total input fname|]
export
lexParseTerm : FileName -> String -> Either Error PTerm
lexParseTerm = lexParseWith . term
export
lexParseInput : FileName -> String -> Either Error (List PTopLevel)
lexParseInput = lexParseWith . input