remove lex/parse stuff for now

This commit is contained in:
rhiannon morris 2023-01-08 14:58:18 +01:00
parent 44c4aea06c
commit 8443b2f6d8
9 changed files with 10 additions and 10 deletions

102
lib/on-hold/Quox/Lexer.idr Normal file
View file

@ -0,0 +1,102 @@
module Quox.Lexer
import public Quox.Token
import Data.String
import Data.String.Extra
import public Text.Lexer
import public Text.Lexer.Tokenizer
import Control.Monad.Either
import Generics.Derive
%default total
%language ElabReflection
public export
record Error where
constructor Err
reason : StopReason
line, col : Int
char : Char
nameStart = pred $ \c => isAlpha c || c == '_'
nameCont = pred $ \c => isAlphaNum c || c == '_' || c == '\''
name = nameStart <+> many nameCont <+> reject nameCont
wild = is '_' <+> reject nameCont
%hide Text.Lexer.symbol
symbol = is '\'' <+> name
decimal = some digit <+> reject nameCont
natToNumber : Nat -> Number
natToNumber 0 = Zero
natToNumber 1 = One
natToNumber k = Other k
skip : Lexer -> Tokenizer (Maybe a)
skip lex = match lex $ const Nothing
simple : Char -> a -> Tokenizer (Maybe a)
simple ch = match (is ch) . const . Just
keyword : String -> Keyword -> Tokenizer (Maybe Token)
keyword str = match (exact str <+> reject nameCont) . const . Just . K
choice : (xs : List (Tokenizer a)) -> {auto 0 _ : NonEmpty xs} -> Tokenizer a
choice (t :: ts) = foldl (\a, b => a <|> b) t ts
match : Lexer -> (String -> a) -> Tokenizer (Maybe a)
match lex f = Tokenizer.match lex (Just . f)
%hide Tokenizer.match
tokens : Tokenizer (Maybe Token)
tokens = choice [
skip $ lineComment $ exact "--",
skip $ blockComment (exact "{-") (exact "-}"),
skip spaces,
simple '(' $ P LParen, simple ')' $ P RParen,
simple '[' $ P LSquare, simple ']' $ P RSquare,
simple '{' $ P LBrace, simple '}' $ P RBrace,
simple ',' $ P Comma,
simple '' $ P DblColon,
simple ':' $ P Colon, -- needs to be after '::'
simple '.' $ P Dot,
simple '' $ P Arrow,
simple '' $ P DblArrow,
simple '×' $ P Times,
simple '' $ P Triangle,
match wild $ const $ P Wild,
keyword "λ" Lam,
keyword "let" Let, keyword "in" In,
keyword "case" Case, keyword "of" Of,
keyword "ω" Omega,
keyword "Π" Pi, keyword "Σ" Sigma, keyword "W" W,
match name $ Name,
match symbol $ Symbol . assert_total strTail,
match decimal $ N . natToNumber . cast,
match (is '' <+> decimal) $ TYPE . cast . assert_total strTail
]
export
lex : String -> Either Error (List BToken)
lex str =
let (res, (reason, line, col, str)) = lex tokens str in
case reason of
EndInput => Right $ mapMaybe sequence res
_ => let char = assert_total strIndex str 0 in
Left $ Err {reason, line, col, char}

159
lib/on-hold/Quox/Parser.idr Normal file
View file

@ -0,0 +1,159 @@
module Quox.Parser
import Quox.Syntax
import Quox.Token
import Quox.Lexer
import Data.Maybe
import Data.SnocVect
import Data.SnocList
import Text.Parser
%default total
public export
Vars : Nat -> Type
Vars n = SnocVect n String
public export
Grammar : Bool -> Type -> Type
Grammar = Core.Grammar () Token
%hide Core.Grammar
public export
data Error
= Lex (Lexer.Error)
| Parse (List1 (ParsingError Token))
| Leftover (List BToken)
%hide Lexer.Error
public export
parseAll : {c : Bool} -> Grammar c a -> List BToken -> Either Error a
parseAll grm input =
case parse grm input of
Right (x, []) => Right x
Right (x, rest) => Left $ Leftover rest
Left errs => Left $ Parse errs
public export
lexParseAll : {c : Bool} -> Grammar c a -> String -> Either Error a
lexParseAll grm = lex' >=> parseAll grm
where lex' : String -> Either Error (List BToken)
lex' = bimap Lex id . lex
export
punc : Punc -> Grammar True ()
punc p = terminal (show p) $ \case
P p' => if p == p' then Just () else Nothing
_ => Nothing
export
keyword : Keyword -> Grammar True ()
keyword k = terminal (show k) $ \case
K k' => if k == k' then Just () else Nothing
_ => Nothing
export
between : Punc -> Punc -> Grammar True a -> Grammar True a
between opener closer inner = punc opener *> inner <* punc closer
export
parens, squares, braces : Grammar True a -> Grammar True a
parens = between LParen RParen
squares = between LSquare RSquare
braces = between LBrace RBrace
export
number : Grammar True Nat
number = terminal "number" $ \case
N Zero => Just 0
N One => Just 1
N (Other k) => Just k
_ => Nothing
export
universe : Grammar True Nat
universe = terminal "universe" $ \case TYPE k => Just k; _ => Nothing
export
zero, one, omega : Grammar True ()
zero = terminal "0" $ \case N Zero => Just (); _ => Nothing
one = terminal "1" $ \case N One => Just (); _ => Nothing
omega = terminal "ω" $ \case K Omega => Just (); _ => Nothing
export
quantity : Grammar True Qty
quantity = Zero <$ zero <|> One <$ one <|> Any <$ omega
find1 : Eq a => SnocVect k a -> a -> Maybe (Var k)
find1 [<] y = Nothing
find1 (sx :< x) y = if x == y then Just VZ else VS <$> find1 sx y
find : Vars k -> Name -> Maybe (Var k)
find vs (MakeName [<] (UN y)) = find1 vs y
find _ _ = Nothing
export
checkAvoid1 : Vars n -> String -> Grammar False ()
checkAvoid1 avoid y =
when (isJust $ find1 avoid y) $
fail "wrong type of bound variable: \{show y}"
export
checkAvoid : Vars n -> Name -> Grammar False ()
checkAvoid avoid (MakeName [<] (UN y)) = checkAvoid1 avoid y
checkAvoid _ _ = pure ()
export
bound : (what : String) -> (bound : Vars k) -> (avoid : Vars n) ->
Grammar True (Var k)
bound what vs avoid = do
x <- terminal "bound \{what} variable" $ \case Name x => Just x; _ => Nothing
checkAvoid1 avoid x
maybe (fail "not in scope: \{x}") pure $ find1 vs x
export
sname : Grammar True String
sname = terminal "simple name" $ \case Name x => pure x; _ => Nothing
export
qname : Grammar True Name
qname = do
parts <- sepBy1 (punc Dot) sname
pure $ MakeName {mods = cast $ init parts, base = UN $ last parts}
export
nameWith : (bound : Vars k) -> (avoid : Vars n) ->
Grammar True (Either (Var k) Name)
nameWith bound avoid = do
y <- qname
checkAvoid avoid y
pure $ maybe (Right y) Left $ find bound y
export
dimension : (dvars : Vars d) -> (tvars : Vars n) -> Grammar True (Dim d)
dimension dvars tvars =
K Zero <$ zero
<|> K One <$ one
<|> B <$> bound "dimension" {bound = dvars, avoid = tvars}
mutual
export
term : (dvars : Vars d) -> (tvars : Vars n) -> Grammar True (Term d n)
term dvars tvars =
E <$> squares (elim {dvars, tvars})
<|> TYPE . U <$> universe
export
elim : (dvars : Vars d) -> (tvars : Vars n) -> Grammar True (Elim d n)
elim dvars tvars =
either B F <$> nameWith {bound = tvars, avoid = dvars}

View file

@ -0,0 +1,49 @@
module Quox.Token
import Generics.Derive
import Text.Lexer
%default total
%language ElabReflection
public export
data Punc
= LParen | RParen
| LSquare | RSquare
| LBrace | RBrace
| Comma
| Colon | DblColon
| Dot
| Arrow | DblArrow
| Times | Triangle
| Wild
%runElab derive "Punc" [Generic, Meta, Eq, Ord, DecEq, Show]
public export
data Keyword
= Lam | Let | In | Case | Of | Omega
| Pi | Sigma | W
%runElab derive "Keyword" [Generic, Meta, Eq, Ord, DecEq, Show]
||| zero and one are separate because they are
||| quantity & dimension constants
public export
data Number = Zero | One | Other Nat
%runElab derive "Number" [Generic, Meta, Eq, Ord, DecEq, Show]
public export
data Token
= P Punc
| K Keyword
| Name String | Symbol String
| N Number | TYPE Nat
%runElab derive "Token" [Generic, Meta, Eq, Ord, DecEq, Show]
public export
BToken : Type
BToken = WithBounds Token

View file

@ -0,0 +1,140 @@
module Quox.Unicode
import Generics.Derive
%default total
%language ElabReflection
namespace Letter
public export
data Letter = Uppercase | Lowercase | Titlecase | Modifier | Other
%runElab derive "Letter" [Generic, Meta, Eq, Ord, DecEq, Show]
namespace Mark
public export
data Mark = NonSpacing | SpacingCombining | Enclosing
%runElab derive "Mark" [Generic, Meta, Eq, Ord, DecEq, Show]
namespace Number
public export
data Number = Decimal | Letter | Other
%runElab derive "Number" [Generic, Meta, Eq, Ord, DecEq, Show]
namespace Punctuation
public export
data Punctuation = Connector | Dash | Open | Close
| InitialQuote | FinalQuote | Other
%runElab derive "Punctuation" [Generic, Meta, Eq, Ord, DecEq, Show]
namespace Symbol
public export
data Symbol = Math | Currency | Modifier | Other
%runElab derive "Symbol" [Generic, Meta, Eq, Ord, DecEq, Show]
namespace Separator
public export
data Separator = Space | Line | Paragraph
%runElab derive "Separator" [Generic, Meta, Eq, Ord, DecEq, Show]
namespace Other
public export
data Other = Control | Format | Surrogate | PrivateUse | NotAssigned
%runElab derive "Other" [Generic, Meta, Eq, Ord, DecEq, Show]
public export
data GeneralCategory
= Letter Letter
| Mark Mark
| Number Number
| Punctuation Punctuation
| Symbol Symbol
| Separator Separator
| Other Other
%runElab derive "GeneralCategory" [Generic, Meta, Eq, Ord, DecEq, Show]
private
%foreign "scheme:(lambda (c) (symbol->string (char-general-category c)))"
prim__genCat : Char -> String
export
genCat : Char -> GeneralCategory
genCat ch = assert_total $
case prim__genCat ch of
"Lu" => Letter Uppercase
"Ll" => Letter Lowercase
"Lt" => Letter Titlecase
"Lm" => Letter Modifier
"Lo" => Letter Other
"Mn" => Mark NonSpacing
"Mc" => Mark SpacingCombining
"Me" => Mark Enclosing
"Nd" => Number Decimal
"Nl" => Number Letter
"No" => Number Other
"Pc" => Punctuation Connector
"Pd" => Punctuation Dash
"Ps" => Punctuation Open
"Pe" => Punctuation Close
"Pi" => Punctuation InitialQuote
"Pf" => Punctuation FinalQuote
"Po" => Punctuation Other
"Sm" => Symbol Math
"Sc" => Symbol Currency
"Sk" => Symbol Modifier
"So" => Symbol Other
"Zs" => Separator Space
"Zl" => Separator Line
"Zp" => Separator Paragraph
"Cc" => Other Control
"Cf" => Other Format
"Cs" => Other Surrogate
"Co" => Other PrivateUse
"Cn" => Other NotAssigned
export
isIdStart : Char -> Bool
isIdStart ch =
case genCat ch of
Letter _ => True
Number _ => not ('0' <= ch && ch <= '9')
_ => False
export
isIdCont : Char -> Bool
isIdCont ch =
isIdStart ch || ch == '\'' ||
case genCat ch of
Mark _ => True
Number _ => True
_ => False
export
isIdConnector : Char -> Bool
isIdConnector ch =
case genCat ch of Punctuation Connector => True; _ => False
export
isSymChar : Char -> Bool
isSymChar ch =
case genCat ch of
Symbol _ => True
Punctuation Dash => True
Punctuation Other => True
_ => False
export
isWhitespace : Char -> Bool
isWhitespace ch =
ch == '\t' || ch == '\r' || ch == '\n' ||
case genCat ch of Separator _ => True; _ => False
export
%foreign "scheme:string-normalize-nfc"
normalizeNfc : String -> String