This commit is contained in:
rhiannon morris 2023-03-04 21:02:51 +01:00
parent 95a6644a6c
commit edeee68cb7
11 changed files with 788 additions and 159 deletions

View file

@ -4,8 +4,9 @@ import Quox.CharExtra
import Quox.Name
import Data.String.Extra
import Data.SortedMap
import Text.Lexer
import Text.Lexer.Tokenizer
import public Data.List.Elem
import public Text.Lexer
import public Text.Lexer.Tokenizer
import Derive.Prelude
%hide TT.Name
@ -25,7 +26,7 @@ data Token = R String | I Name | N Nat | S String | T String | TYPE Nat
%runElab derive "Token" [Eq, Ord, Show]
-- token or whitespace
private
public export
0 TokenW : Type
TokenW = Maybe Token
@ -41,70 +42,51 @@ record Error where
%runElab derive "Error" [Eq, Ord, Show]
private
skip : Lexer -> Tokenizer TokenW
skip t = match t $ const Nothing
export
syntaxChars : List Char
syntaxChars = ['(', ')', '[', ']', '{', '}', '`', '"', '\'', ',', '.']
private
match : Lexer -> (String -> Token) -> Tokenizer TokenW
match t f = Tokenizer.match t (Just . f)
%hide Tokenizer.match
export
isSymStart, isSymCont : Char -> Bool
export %inline
syntaxChars : List Char
syntaxChars = ['(', ')', '[', ']', '{', '}', '"', '\'', ',', '.', ';']
private
stra, isSymCont : Char -> Bool
isSymStart c = not (c `elem` syntaxChars) && isSymChar c
isSymCont c = c == '\'' || isSymStart c
export
idStart, idCont, idEnd, idContEnd, symStart, symCont : Lexer
private
idStart, idCont, idEnd, idContEnd : Lexer
idStart = pred isIdStart
idCont = pred isIdCont
idEnd = pred $ \c => c `elem` unpack "?!#"
idContEnd = idCont <|> idEnd
private
symStart, symCont : Lexer
symStart = pred isSymStart
symCont = pred isSymCont
private %inline
resVal : String -> a -> Maybe Token
resVal str = const $ Just $ R str
export
resWordL, resSymL, syntaxL : String -> Lexer
resWordL str = exact str <+> reject idContEnd
resSymL str = exact str <+> reject symCont
syntaxL str = exact str
export
resWord, resSym, syntax : String -> Tokenizer TokenW
resWord str = match (resWordL str) (resVal str)
resSym str = match (resSymL str) (resVal str)
syntax str = match (syntaxL str) (resVal str)
-- return value contains unicode version
export
uresWord, uresSym, usyntax : (unicode, ascii : String) -> Tokenizer TokenW
uresWord u a = match (resWordL u <|> resWordL a) (resVal u)
uresSym u a = match (resSymL u <|> resSymL a) (resVal u)
usyntax u a = match (exact u <|> exact a) (resVal u)
export
alphaName, symName, baseNameL : Lexer
alphaName = idStart <+> many idCont <+> many idEnd
symName = symStart <+> many symCont
baseNameL = symName <|> alphaName
export
baseName : Tokenizer BaseName
baseName = match baseNameL UN
private
baseNameL : Lexer
baseNameL = idStart <+> many idCont <+> many idEnd
<|> symStart <+> many symCont
private
toName : String -> Name
toName = fromList . split (== '.')
nameL : Lexer
nameL = baseNameL <+> many (is '.' <+> baseNameL)
export
private
name : Tokenizer TokenW
name = match (baseNameL <+> many (is '.' <+> baseNameL))
(Just . I . toName . normalizeNfc)
name = match nameL $ I . fromList . split (== '.') . normalizeNfc
-- [todo] escapes other than \" and (accidentally) \\
||| [todo] escapes other than `\"` and (accidentally) `\\`
export
fromStringLit : String -> String
fromStringLit = pack . go . unpack . drop 1 . dropLast 1 where
@ -114,18 +96,18 @@ fromStringLit = pack . go . unpack . drop 1 . dropLast 1 where
go ('\\' :: c :: cs) = c :: go cs
go (c :: cs) = c :: go cs
export
private
string : Tokenizer TokenW
string = match stringLit (Just . S . fromStringLit)
string = match stringLit (S . fromStringLit)
export
private
nat : Tokenizer TokenW
nat = match (some (range '0' '9')) (Just . N . cast)
nat = match (some (range '0' '9')) (N . cast)
export
private
tag : Tokenizer TokenW
tag = match (is '`' <+> baseNameL) (Just . T . drop 1)
<|> match (is '`' <+> stringLit) (Just . T . fromStringLit . drop 1)
tag = match (is '\'' <+> nameL) (T . drop 1)
<|> match (is '\'' <+> stringLit) (T . fromStringLit . drop 1)
@ -140,38 +122,115 @@ subToNat : String -> Nat
subToNat = cast . pack . map fromSub . unpack
export
private
universe : Tokenizer TokenW
universe = universeWith "" <|> universeWith "Type" where
universeWith : String -> Tokenizer TokenW
universeWith pfx =
let len = length pfx in
match (exact pfx <+> some (range '0' '9'))
(Just . TYPE . cast . drop len) <|>
(TYPE . cast . drop len) <|>
match (exact pfx <+> some (range '' ''))
(Just . TYPE . subToNat . drop len)
(TYPE . subToNat . drop len)
export
private %inline
choice : (xs : List (Tokenizer a)) -> (0 _ : NonEmpty xs) => Tokenizer a
choice (t :: ts) = foldl (\a, b => a <|> b) t ts
namespace Res
||| description of a reserved symbol
||| @ W a reserved word (must not be followed by letters, digits, etc)
||| @ S a reserved symbol (must not be followed by symbolic chars)
||| @ X a character that doesn't show up in names (brackets, etc)
public export
data Res1 = W String | S String | X Char
%runElab derive "Res1" [Eq, Ord, Show]
||| description of a token that might have unicode & ascii-only aliases
public export
data Res = Only Res1 | Or Res1 Res1
%runElab derive "Res" [Eq, Ord, Show]
public export
S1, W1 : String -> Res
S1 = Only . S
W1 = Only . W
public export
X1 : Char -> Res
X1 = Only . X
public export
resString1 : Res1 -> String
resString1 (X x) = singleton x
resString1 (W w) = w
resString1 (S s) = s
||| return the representative string for a token description. if there are
||| two, then it's the first one, which should be the full-unicode one
public export
resString : Res -> String
resString (Only r) = resString1 r
resString (r `Or` _) = resString1 r
private
resTokenizer1 : Res1 -> String -> Tokenizer TokenW
resTokenizer1 r str =
let res : String -> Token := const $ R str in
case r of W w => match (exact w <+> reject idContEnd) res
S s => match (exact s <+> reject symCont) res
X x => match (is x) res
||| match a reserved token
export
resTokenizer : Res -> Tokenizer TokenW
resTokenizer (Only r) = resTokenizer1 r (resString1 r)
resTokenizer (r `Or` s) =
resTokenizer1 r (resString1 r) <|> resTokenizer1 s (resString1 r)
||| reserved words & symbols.
||| the tokens recognised by ``a `Or` b`` will be `R a`. e.g. `=>` in the input
||| (if not part of a longer name) will be returned as `R "⇒"`.
public export
reserved : List Res
reserved =
[X1 '(', X1 ')', X1 '[', X1 ']', X1 '{', X1 '}', X1 ',', X1 ';',
S1 "@",
S1 ":",
S "" `Or` S "=>",
S "" `Or` S "->",
S "×" `Or` S "**",
S "" `Or` S "==",
S "" `Or` S "::",
S "·" `Or` X '.',
W1 "case",
W1 "case1",
W "caseω" `Or` W "case#",
W1 "return",
W1 "of",
W1 "_",
W1 "Eq",
W "λ" `Or` W "fun",
W "δ" `Or` W "dfun",
W "ω" `Or` S "#",
S "" `Or` W "Type"]
||| `IsReserved str` is true if `R str` might actually show up in
||| the token stream
public export
IsReserved : String -> Type
IsReserved str = str `Elem` map resString reserved
export
tokens : Tokenizer TokenW
tokens = choice $
map skip [pred isWhitespace,
lineComment (exact "--" <+> reject symCont),
blockComment (exact "{-") (exact "-}")] <+>
map syntax ["(", ")", "[", "]", ",", "{", "}", "."] <+>
[match (exact "`{" <+> reject (is '-')) (resVal "`{")] <+>
map resSym ["@", ":"] <+>
map (uncurry uresSym)
[("·", "%"), ("","->"), ("×", "**"), ("", "=="), ("", "::")] <+>
map resWord ["case", "case1", "caseω", "return", "of", "_"] <+>
map (uncurry uresWord) [("λ","fun"), ("δ","dfun"), ("caseω", "case#")] <+>
[resWord "ω", match (resSymL "#") (resVal "ω"),
universe, resSym "", match (resWordL "Type") (resVal "")] <+>
[universe] <+> -- ★ᵢ takes precedence over bare ★
map resTokenizer reserved <+>
[nat, string, tag, name]
export

326
lib/Quox/Parser.idr Normal file
View file

@ -0,0 +1,326 @@
module Quox.Parser
import public Quox.Syntax.Qty.Three
import public Quox.Syntax
import public Quox.Lexer
import Data.Fin
import Data.Vect
import public Text.Parser
import Derive.Prelude
%hide TT.Name
%default total
%language ElabReflection
public export
0 Grammar : Bool -> Type -> Type
Grammar = Core.Grammar () Token
%hide Core.Grammar
public export
0 BName : Type
BName = Maybe BaseName
public export
0 PUniverse : Type
PUniverse = Nat
public export
0 PQty : Type
PQty = Three
namespace PDim
public export
data PDim = K DimConst | V BaseName
%runElab derive "PDim" [Eq, Ord, Show]
namespace PTerm
mutual
||| terms out of the parser with BVs and bidirectionality still tangled up
public export
data PTerm =
TYPE Nat
| Pi PQty BName PTerm PTerm
| Lam BName PTerm
| (:@) PTerm PTerm
| Sig BName PTerm PTerm
| Pair PTerm PTerm
| Case PQty PTerm (BName, PTerm) (PCaseBody)
| Enum (List TagVal)
| Tag TagVal
| Eq (BName, PTerm) PTerm PTerm
| DLam BName PTerm
| (:%) PTerm PDim
| V Name
| (:#) PTerm PTerm
public export
data PCaseBody =
CasePair (BName, BName) PTerm
| CaseEnum (List (TagVal, PTerm))
%runElab deriveMutual ["PTerm", "PCaseBody"] [Eq, Ord, Show]
private
data PArg = T PTerm | D PDim
%runElab derive "PArg" [Eq, Ord, Show]
private
apply : PTerm -> List PArg -> PTerm
apply = foldl $ \f, x => case x of T x => f :@ x; D p => f :% p
private
annotate : PTerm -> Maybe PTerm -> PTerm
annotate s a = maybe s (s :#) a
export
res : (str : String) -> (0 _ : IsReserved str) => Grammar True ()
res str = terminal "expecting \"\{str}\"" $
\x => guard $ x == R str
export
resC : (str : String) -> (0 _ : IsReserved str) => Grammar True ()
resC str = do res str; commit
parameters {default True commit : Bool}
private
maybeCommit : Grammar False ()
maybeCommit = when commit Core.commit
export
betweenR : {c : Bool} -> (op, cl : String) ->
(0 _ : IsReserved op) => (0 _ : IsReserved cl) =>
Grammar c a -> Grammar True a
betweenR o c p = res o *> maybeCommit *> p <* res c <* maybeCommit
export
parens, bracks, braces : {c : Bool} -> Grammar c a -> Grammar True a
parens = betweenR "(" ")"
bracks = betweenR "[" "]"
braces = betweenR "{" "}"
export
commaSep : {c : Bool} -> Grammar c a -> Grammar False (List a)
commaSep = sepEndBy (res ",")
-- don't commit because of the possible terminating ","
export
semiSep : {c : Bool} -> Grammar c a -> Grammar False (List a)
semiSep = sepEndBy (res ";")
export
commaSep1 : {c : Bool} -> Grammar c a -> Grammar c (List1 a)
commaSep1 = sepEndBy1 (res ",")
export
darr : Grammar True ()
darr = resC ""
export
name : Grammar True Name
name = terminal "expecting name" $
\case I i => Just i; _ => Nothing
export
baseName : Grammar True BaseName
baseName = terminal "expecting unqualified name" $
\case I i => guard (null i.mods) $> i.base
_ => Nothing
export
nat : Grammar True Nat
nat = terminal "expecting natural number" $
\case N n => pure n; _ => Nothing
export
string : Grammar True String
string = terminal "expecting string literal" $
\case S s => pure s; _ => Nothing
export
tag : Grammar True String
tag = terminal "expecting tag constructor" $
\case T t => pure t; _ => Nothing
export
bareTag : Grammar True String
bareTag = string <|> [|toDots name|]
export
universe : Grammar True PUniverse
universe = terminal "expecting type universe" $
\case TYPE u => Just u; _ => Nothing
export
bname : Grammar True BName
bname = Nothing <$ res "_"
<|> [|Just baseName|]
export
zeroOne : (zero, one : a) -> Grammar True a
zeroOne zero one = terminal "expecting zero or one" $
\case N 0 => Just zero; N 1 => Just one; _ => Nothing
export covering
qty : Grammar True PQty
qty = zeroOne Zero One
<|> Any <$ res "ω"
<|> parens qty
private covering
qtys : Grammar False (List PQty)
qtys = option [] [|toList $ some qty <* res "·"|]
export
dimConst : Grammar True DimConst
dimConst = zeroOne Zero One
export covering
dim : Grammar True PDim
dim = [|V baseName|]
<|> [|K dimConst|]
<|> parens dim
private
0 PBinderHead : Nat -> Type
PBinderHead n = (Vect n PQty, BName, PTerm)
private
toVect : List a -> (n ** Vect n a)
toVect [] = (_ ** [])
toVect (x :: xs) = (_ ** x :: snd (toVect xs))
private
0 MakeBinder : Nat -> Type
MakeBinder n = (String, PBinderHead n -> PTerm -> PTerm)
private
makePi : MakeBinder 1
makePi = ("", \([pi], x, s) => Pi pi x s)
private
makeSig : MakeBinder 0
makeSig = ("×", \([], x, s) => Sig x s)
private
plural : Nat -> a -> a -> a
plural 1 s p = s
plural _ s p = p
private
makeBinder : {m, n : Nat} -> MakeBinder m -> PBinderHead n -> PTerm ->
Grammar False PTerm
makeBinder (str, f) h t =
case decEq m n of
Yes Refl => pure $ f h t
No _ => fatalError
"'\{str}' expects \{show m} quantit\{plural m "y" "ies"}, got \{show n}"
private
binderInfix : Grammar True (n ** MakeBinder n)
binderInfix = res "" $> (1 ** makePi)
<|> res "×" $> (0 ** makeSig)
private
lamIntro : Grammar True (BName -> PTerm -> PTerm)
lamIntro = Lam <$ resC "λ"
<|> DLam <$ resC "δ"
private covering
caseIntro : Grammar True PQty
caseIntro = resC "case1" $> One
<|> resC "caseω" $> Any
<|> resC "case" *> qty <* resC "·"
private
optNameBinder : Grammar False BName
optNameBinder = [|join $ optional $ bname <* darr|]
mutual
export covering
term : Grammar True PTerm
term = lamTerm
<|> caseTerm
<|> bindTerm
<|> [|annotate infixEqTerm (optional $ resC "" *> term)|]
private covering
lamTerm : Grammar True PTerm
lamTerm = flip . foldr <$> lamIntro <*> some bname <* darr <*> term
private covering
caseTerm : Grammar True PTerm
caseTerm =
[|Case caseIntro term
(resC "return" *> optBinderTerm)
(resC "of" *> braces caseBody)|]
private covering
caseBody : Grammar False PCaseBody
caseBody = [|CasePair (pairPat <* darr) (term <* optSemi)|]
<|> [|CaseEnum $ semiSep [|MkPair tag (darr *> term)|]|]
where
optSemi = ignore $ optional $ res ";"
pairPat = parens [|MkPair bname (resC "," *> bname)|]
private covering
bindTerm : Grammar True PTerm
bindTerm = do
bnd <- binderHead
inf <- binderInfix
body <- term
makeBinder (snd inf) (snd bnd) body
private covering
infixEqTerm : Grammar True PTerm
infixEqTerm = do
l <- appTerm
rty <- optional [|MkPair (resC "" *> term) (resC ":" *> appTerm)|]
pure $ maybe l (\rty => Eq (Nothing, snd rty) l (fst rty)) rty
private covering
appTerm : Grammar True PTerm
appTerm = resC "" *> [|TYPE nat|]
<|> resC "Eq" *> [|Eq (bracks optBinderTerm) aTerm aTerm|]
<|> [|apply aTerm (many appArg)|]
private covering
appArg : Grammar True PArg
appArg = [|D $ resC "@" *> dim|]
<|> [|T aTerm|]
private covering
aTerm : Grammar True PTerm
aTerm = [|Enum $ braces $ commaSep bareTag|]
<|> [|TYPE universe|]
<|> [|V name|]
<|> [|Tag tag|]
<|> foldr1 Pair <$> parens (commaSep1 term)
private covering
binderHead : Grammar True (n ** PBinderHead n)
binderHead = parens {commit = False} $ do
qs <- [|toVect qtys|]
name <- bname
resC ":"
ty <- term
pure (_ ** (qs.snd, name, ty))
private covering
optBinderTerm : Grammar True (BName, PTerm)
optBinderTerm = [|MkPair optNameBinder term|]

View file

@ -96,7 +96,7 @@ mutual
||| pair destruction
|||
||| `CasePair 𝜋 𝑒 ([𝑟], 𝐴) ([𝑥, 𝑦], 𝑡)` is
||| `𝐜𝐚𝐬𝐞 𝜋 · 𝑒 𝐫𝐞𝐭𝐮𝐫𝐧 𝑟𝐴 𝐨𝐟 { (𝑥, 𝑦). 𝑡 }`
||| `𝐜𝐚𝐬𝐞 𝜋 · 𝑒 𝐫𝐞𝐭𝐮𝐫𝐧 𝑟𝐴 𝐨𝐟 { (𝑥, 𝑦) 𝑡 }`
CasePair : (qty : q) -> (pair : Elim q d n) ->
(ret : ScopeTerm q d n) ->
(body : ScopeTermN 2 q d n) ->

View file

@ -11,10 +11,11 @@ import Data.Vect
private %inline
typeD, arrowD, timesD, lamD, eqndD, dlamD, annD :
typeD, arrowD, darrowD, timesD, lamD, eqndD, dlamD, annD :
Pretty.HasEnv m => m (Doc HL)
typeD = hlF Syntax $ ifUnicode "" "Type"
arrowD = hlF Syntax $ ifUnicode "" "->"
darrowD = hlF Syntax $ ifUnicode "" "=>"
timesD = hlF Syntax $ ifUnicode "×" "**"
lamD = hlF Syntax $ ifUnicode "λ" "fun"
eqndD = hlF Syntax $ ifUnicode "" "=="
@ -22,11 +23,10 @@ dlamD = hlF Syntax $ ifUnicode "δ" "dfun"
annD = hlF Syntax $ ifUnicode "" "::"
private %inline
eqD, colonD, commaD, dotD, caseD, returnD, ofD : Doc HL
eqD, colonD, commaD, caseD, returnD, ofD : Doc HL
eqD = hl Syntax "Eq"
colonD = hl Syntax ":"
commaD = hl Syntax ","
dotD = hl Delim "."
caseD = hl Syntax "case"
ofD = hl Syntax "of"
returnD = hl Syntax "return"
@ -48,7 +48,7 @@ prettyArm : PrettyHL a => Pretty.HasEnv m =>
BinderSort -> List BaseName -> Doc HL -> a -> m (Doc HL)
prettyArm sort xs pat body = do
body <- withPrec Outer $ unders sort xs $ prettyM body
pure $ hang 2 $ sep [pat <+> dotD, body]
pure $ hang 2 $ sep [pat <++> !darrowD, body]
export
prettyLams : PrettyHL a => Pretty.HasEnv m =>
@ -96,7 +96,7 @@ prettyCase pi elim r ret arms = do
-- [fixme] put delimiters around tags that aren't simple names
export
prettyTag : TagVal -> Doc HL
prettyTag t = hl Tag $ "`" <+> fromString t
prettyTag t = hl Tag $ "'" <+> fromString t
parameters (showSubsts : Bool)
@ -117,7 +117,7 @@ parameters (showSubsts : Bool)
let GotPairs {init, last, _} = getPairs' [< s] t in
prettyTuple $ toList $ init :< last
prettyM (Enum tags) =
pure $ delims "`{" "}" . aseparate comma $ map prettyTag $
pure $ delims "{" "}" . aseparate comma $ map prettyTag $
Prelude.toList tags
prettyM (Tag t) =
pure $ prettyTag t

View file

@ -35,4 +35,5 @@ modules =
Quox.Name,
Quox.Typing,
Quox.Typechecker,
Quox.Lexer
Quox.Lexer,
Quox.Parser