remove lex/parse stuff for now
This commit is contained in:
parent
44c4aea06c
commit
8443b2f6d8
9 changed files with 10 additions and 10 deletions
102
lib/on-hold/Quox/Lexer.idr
Normal file
102
lib/on-hold/Quox/Lexer.idr
Normal file
|
@ -0,0 +1,102 @@
|
|||
module Quox.Lexer
|
||||
|
||||
import public Quox.Token
|
||||
|
||||
import Data.String
|
||||
import Data.String.Extra
|
||||
import public Text.Lexer
|
||||
import public Text.Lexer.Tokenizer
|
||||
import Control.Monad.Either
|
||||
import Generics.Derive
|
||||
|
||||
%default total
|
||||
%language ElabReflection
|
||||
|
||||
|
||||
public export
|
||||
record Error where
|
||||
constructor Err
|
||||
reason : StopReason
|
||||
line, col : Int
|
||||
char : Char
|
||||
|
||||
|
||||
|
||||
nameStart = pred $ \c => isAlpha c || c == '_'
|
||||
nameCont = pred $ \c => isAlphaNum c || c == '_' || c == '\''
|
||||
|
||||
name = nameStart <+> many nameCont <+> reject nameCont
|
||||
|
||||
wild = is '_' <+> reject nameCont
|
||||
|
||||
%hide Text.Lexer.symbol
|
||||
symbol = is '\'' <+> name
|
||||
|
||||
decimal = some digit <+> reject nameCont
|
||||
|
||||
|
||||
natToNumber : Nat -> Number
|
||||
natToNumber 0 = Zero
|
||||
natToNumber 1 = One
|
||||
natToNumber k = Other k
|
||||
|
||||
|
||||
skip : Lexer -> Tokenizer (Maybe a)
|
||||
skip lex = match lex $ const Nothing
|
||||
|
||||
simple : Char -> a -> Tokenizer (Maybe a)
|
||||
simple ch = match (is ch) . const . Just
|
||||
|
||||
keyword : String -> Keyword -> Tokenizer (Maybe Token)
|
||||
keyword str = match (exact str <+> reject nameCont) . const . Just . K
|
||||
|
||||
choice : (xs : List (Tokenizer a)) -> {auto 0 _ : NonEmpty xs} -> Tokenizer a
|
||||
choice (t :: ts) = foldl (\a, b => a <|> b) t ts
|
||||
|
||||
match : Lexer -> (String -> a) -> Tokenizer (Maybe a)
|
||||
match lex f = Tokenizer.match lex (Just . f)
|
||||
%hide Tokenizer.match
|
||||
|
||||
|
||||
tokens : Tokenizer (Maybe Token)
|
||||
tokens = choice [
|
||||
skip $ lineComment $ exact "--",
|
||||
skip $ blockComment (exact "{-") (exact "-}"),
|
||||
skip spaces,
|
||||
|
||||
simple '(' $ P LParen, simple ')' $ P RParen,
|
||||
simple '[' $ P LSquare, simple ']' $ P RSquare,
|
||||
simple '{' $ P LBrace, simple '}' $ P RBrace,
|
||||
simple ',' $ P Comma,
|
||||
simple '∷' $ P DblColon,
|
||||
simple ':' $ P Colon, -- needs to be after '::'
|
||||
simple '.' $ P Dot,
|
||||
|
||||
simple '→' $ P Arrow,
|
||||
simple '⇒' $ P DblArrow,
|
||||
simple '×' $ P Times,
|
||||
simple '⊲' $ P Triangle,
|
||||
match wild $ const $ P Wild,
|
||||
|
||||
keyword "λ" Lam,
|
||||
keyword "let" Let, keyword "in" In,
|
||||
keyword "case" Case, keyword "of" Of,
|
||||
keyword "ω" Omega,
|
||||
keyword "Π" Pi, keyword "Σ" Sigma, keyword "W" W,
|
||||
|
||||
match name $ Name,
|
||||
match symbol $ Symbol . assert_total strTail,
|
||||
|
||||
match decimal $ N . natToNumber . cast,
|
||||
match (is '★' <+> decimal) $ TYPE . cast . assert_total strTail
|
||||
]
|
||||
|
||||
|
||||
export
|
||||
lex : String -> Either Error (List BToken)
|
||||
lex str =
|
||||
let (res, (reason, line, col, str)) = lex tokens str in
|
||||
case reason of
|
||||
EndInput => Right $ mapMaybe sequence res
|
||||
_ => let char = assert_total strIndex str 0 in
|
||||
Left $ Err {reason, line, col, char}
|
159
lib/on-hold/Quox/Parser.idr
Normal file
159
lib/on-hold/Quox/Parser.idr
Normal file
|
@ -0,0 +1,159 @@
|
|||
module Quox.Parser
|
||||
|
||||
import Quox.Syntax
|
||||
import Quox.Token
|
||||
import Quox.Lexer
|
||||
|
||||
import Data.Maybe
|
||||
import Data.SnocVect
|
||||
import Data.SnocList
|
||||
import Text.Parser
|
||||
|
||||
%default total
|
||||
|
||||
|
||||
public export
|
||||
Vars : Nat -> Type
|
||||
Vars n = SnocVect n String
|
||||
|
||||
public export
|
||||
Grammar : Bool -> Type -> Type
|
||||
Grammar = Core.Grammar () Token
|
||||
%hide Core.Grammar
|
||||
|
||||
public export
|
||||
data Error
|
||||
= Lex (Lexer.Error)
|
||||
| Parse (List1 (ParsingError Token))
|
||||
| Leftover (List BToken)
|
||||
%hide Lexer.Error
|
||||
|
||||
|
||||
public export
|
||||
parseAll : {c : Bool} -> Grammar c a -> List BToken -> Either Error a
|
||||
parseAll grm input =
|
||||
case parse grm input of
|
||||
Right (x, []) => Right x
|
||||
Right (x, rest) => Left $ Leftover rest
|
||||
Left errs => Left $ Parse errs
|
||||
|
||||
public export
|
||||
lexParseAll : {c : Bool} -> Grammar c a -> String -> Either Error a
|
||||
lexParseAll grm = lex' >=> parseAll grm
|
||||
where lex' : String -> Either Error (List BToken)
|
||||
lex' = bimap Lex id . lex
|
||||
|
||||
|
||||
|
||||
export
|
||||
punc : Punc -> Grammar True ()
|
||||
punc p = terminal (show p) $ \case
|
||||
P p' => if p == p' then Just () else Nothing
|
||||
_ => Nothing
|
||||
|
||||
export
|
||||
keyword : Keyword -> Grammar True ()
|
||||
keyword k = terminal (show k) $ \case
|
||||
K k' => if k == k' then Just () else Nothing
|
||||
_ => Nothing
|
||||
|
||||
export
|
||||
between : Punc -> Punc -> Grammar True a -> Grammar True a
|
||||
between opener closer inner = punc opener *> inner <* punc closer
|
||||
|
||||
export
|
||||
parens, squares, braces : Grammar True a -> Grammar True a
|
||||
parens = between LParen RParen
|
||||
squares = between LSquare RSquare
|
||||
braces = between LBrace RBrace
|
||||
|
||||
|
||||
export
|
||||
number : Grammar True Nat
|
||||
number = terminal "number" $ \case
|
||||
N Zero => Just 0
|
||||
N One => Just 1
|
||||
N (Other k) => Just k
|
||||
_ => Nothing
|
||||
|
||||
export
|
||||
universe : Grammar True Nat
|
||||
universe = terminal "universe" $ \case TYPE k => Just k; _ => Nothing
|
||||
|
||||
export
|
||||
zero, one, omega : Grammar True ()
|
||||
zero = terminal "0" $ \case N Zero => Just (); _ => Nothing
|
||||
one = terminal "1" $ \case N One => Just (); _ => Nothing
|
||||
omega = terminal "ω" $ \case K Omega => Just (); _ => Nothing
|
||||
|
||||
export
|
||||
quantity : Grammar True Qty
|
||||
quantity = Zero <$ zero <|> One <$ one <|> Any <$ omega
|
||||
|
||||
|
||||
find1 : Eq a => SnocVect k a -> a -> Maybe (Var k)
|
||||
find1 [<] y = Nothing
|
||||
find1 (sx :< x) y = if x == y then Just VZ else VS <$> find1 sx y
|
||||
|
||||
find : Vars k -> Name -> Maybe (Var k)
|
||||
find vs (MakeName [<] (UN y)) = find1 vs y
|
||||
find _ _ = Nothing
|
||||
|
||||
|
||||
export
|
||||
checkAvoid1 : Vars n -> String -> Grammar False ()
|
||||
checkAvoid1 avoid y =
|
||||
when (isJust $ find1 avoid y) $
|
||||
fail "wrong type of bound variable: \{show y}"
|
||||
|
||||
export
|
||||
checkAvoid : Vars n -> Name -> Grammar False ()
|
||||
checkAvoid avoid (MakeName [<] (UN y)) = checkAvoid1 avoid y
|
||||
checkAvoid _ _ = pure ()
|
||||
|
||||
export
|
||||
bound : (what : String) -> (bound : Vars k) -> (avoid : Vars n) ->
|
||||
Grammar True (Var k)
|
||||
bound what vs avoid = do
|
||||
x <- terminal "bound \{what} variable" $ \case Name x => Just x; _ => Nothing
|
||||
checkAvoid1 avoid x
|
||||
maybe (fail "not in scope: \{x}") pure $ find1 vs x
|
||||
|
||||
export
|
||||
sname : Grammar True String
|
||||
sname = terminal "simple name" $ \case Name x => pure x; _ => Nothing
|
||||
|
||||
export
|
||||
qname : Grammar True Name
|
||||
qname = do
|
||||
parts <- sepBy1 (punc Dot) sname
|
||||
pure $ MakeName {mods = cast $ init parts, base = UN $ last parts}
|
||||
|
||||
export
|
||||
nameWith : (bound : Vars k) -> (avoid : Vars n) ->
|
||||
Grammar True (Either (Var k) Name)
|
||||
nameWith bound avoid = do
|
||||
y <- qname
|
||||
checkAvoid avoid y
|
||||
pure $ maybe (Right y) Left $ find bound y
|
||||
|
||||
|
||||
export
|
||||
dimension : (dvars : Vars d) -> (tvars : Vars n) -> Grammar True (Dim d)
|
||||
dimension dvars tvars =
|
||||
K Zero <$ zero
|
||||
<|> K One <$ one
|
||||
<|> B <$> bound "dimension" {bound = dvars, avoid = tvars}
|
||||
|
||||
|
||||
mutual
|
||||
export
|
||||
term : (dvars : Vars d) -> (tvars : Vars n) -> Grammar True (Term d n)
|
||||
term dvars tvars =
|
||||
E <$> squares (elim {dvars, tvars})
|
||||
<|> TYPE . U <$> universe
|
||||
|
||||
export
|
||||
elim : (dvars : Vars d) -> (tvars : Vars n) -> Grammar True (Elim d n)
|
||||
elim dvars tvars =
|
||||
either B F <$> nameWith {bound = tvars, avoid = dvars}
|
49
lib/on-hold/Quox/Token.idr
Normal file
49
lib/on-hold/Quox/Token.idr
Normal file
|
@ -0,0 +1,49 @@
|
|||
module Quox.Token
|
||||
|
||||
import Generics.Derive
|
||||
import Text.Lexer
|
||||
|
||||
%default total
|
||||
%language ElabReflection
|
||||
|
||||
|
||||
public export
|
||||
data Punc
|
||||
= LParen | RParen
|
||||
| LSquare | RSquare
|
||||
| LBrace | RBrace
|
||||
| Comma
|
||||
| Colon | DblColon
|
||||
| Dot
|
||||
| Arrow | DblArrow
|
||||
| Times | Triangle
|
||||
| Wild
|
||||
%runElab derive "Punc" [Generic, Meta, Eq, Ord, DecEq, Show]
|
||||
|
||||
|
||||
public export
|
||||
data Keyword
|
||||
= Lam | Let | In | Case | Of | Omega
|
||||
| Pi | Sigma | W
|
||||
%runElab derive "Keyword" [Generic, Meta, Eq, Ord, DecEq, Show]
|
||||
|
||||
|
||||
||| zero and one are separate because they are
|
||||
||| quantity & dimension constants
|
||||
public export
|
||||
data Number = Zero | One | Other Nat
|
||||
%runElab derive "Number" [Generic, Meta, Eq, Ord, DecEq, Show]
|
||||
|
||||
|
||||
public export
|
||||
data Token
|
||||
= P Punc
|
||||
| K Keyword
|
||||
| Name String | Symbol String
|
||||
| N Number | TYPE Nat
|
||||
%runElab derive "Token" [Generic, Meta, Eq, Ord, DecEq, Show]
|
||||
|
||||
|
||||
public export
|
||||
BToken : Type
|
||||
BToken = WithBounds Token
|
140
lib/on-hold/Quox/Unicode.idr
Normal file
140
lib/on-hold/Quox/Unicode.idr
Normal file
|
@ -0,0 +1,140 @@
|
|||
module Quox.Unicode
|
||||
|
||||
import Generics.Derive
|
||||
|
||||
|
||||
%default total
|
||||
%language ElabReflection
|
||||
|
||||
|
||||
namespace Letter
|
||||
public export
|
||||
data Letter = Uppercase | Lowercase | Titlecase | Modifier | Other
|
||||
%runElab derive "Letter" [Generic, Meta, Eq, Ord, DecEq, Show]
|
||||
|
||||
namespace Mark
|
||||
public export
|
||||
data Mark = NonSpacing | SpacingCombining | Enclosing
|
||||
%runElab derive "Mark" [Generic, Meta, Eq, Ord, DecEq, Show]
|
||||
|
||||
namespace Number
|
||||
public export
|
||||
data Number = Decimal | Letter | Other
|
||||
%runElab derive "Number" [Generic, Meta, Eq, Ord, DecEq, Show]
|
||||
|
||||
namespace Punctuation
|
||||
public export
|
||||
data Punctuation = Connector | Dash | Open | Close
|
||||
| InitialQuote | FinalQuote | Other
|
||||
%runElab derive "Punctuation" [Generic, Meta, Eq, Ord, DecEq, Show]
|
||||
|
||||
namespace Symbol
|
||||
public export
|
||||
data Symbol = Math | Currency | Modifier | Other
|
||||
%runElab derive "Symbol" [Generic, Meta, Eq, Ord, DecEq, Show]
|
||||
|
||||
namespace Separator
|
||||
public export
|
||||
data Separator = Space | Line | Paragraph
|
||||
%runElab derive "Separator" [Generic, Meta, Eq, Ord, DecEq, Show]
|
||||
|
||||
namespace Other
|
||||
public export
|
||||
data Other = Control | Format | Surrogate | PrivateUse | NotAssigned
|
||||
%runElab derive "Other" [Generic, Meta, Eq, Ord, DecEq, Show]
|
||||
|
||||
|
||||
public export
|
||||
data GeneralCategory
|
||||
= Letter Letter
|
||||
| Mark Mark
|
||||
| Number Number
|
||||
| Punctuation Punctuation
|
||||
| Symbol Symbol
|
||||
| Separator Separator
|
||||
| Other Other
|
||||
%runElab derive "GeneralCategory" [Generic, Meta, Eq, Ord, DecEq, Show]
|
||||
|
||||
|
||||
private
|
||||
%foreign "scheme:(lambda (c) (symbol->string (char-general-category c)))"
|
||||
prim__genCat : Char -> String
|
||||
|
||||
export
|
||||
genCat : Char -> GeneralCategory
|
||||
genCat ch = assert_total $
|
||||
case prim__genCat ch of
|
||||
"Lu" => Letter Uppercase
|
||||
"Ll" => Letter Lowercase
|
||||
"Lt" => Letter Titlecase
|
||||
"Lm" => Letter Modifier
|
||||
"Lo" => Letter Other
|
||||
"Mn" => Mark NonSpacing
|
||||
"Mc" => Mark SpacingCombining
|
||||
"Me" => Mark Enclosing
|
||||
"Nd" => Number Decimal
|
||||
"Nl" => Number Letter
|
||||
"No" => Number Other
|
||||
"Pc" => Punctuation Connector
|
||||
"Pd" => Punctuation Dash
|
||||
"Ps" => Punctuation Open
|
||||
"Pe" => Punctuation Close
|
||||
"Pi" => Punctuation InitialQuote
|
||||
"Pf" => Punctuation FinalQuote
|
||||
"Po" => Punctuation Other
|
||||
"Sm" => Symbol Math
|
||||
"Sc" => Symbol Currency
|
||||
"Sk" => Symbol Modifier
|
||||
"So" => Symbol Other
|
||||
"Zs" => Separator Space
|
||||
"Zl" => Separator Line
|
||||
"Zp" => Separator Paragraph
|
||||
"Cc" => Other Control
|
||||
"Cf" => Other Format
|
||||
"Cs" => Other Surrogate
|
||||
"Co" => Other PrivateUse
|
||||
"Cn" => Other NotAssigned
|
||||
|
||||
|
||||
export
|
||||
isIdStart : Char -> Bool
|
||||
isIdStart ch =
|
||||
case genCat ch of
|
||||
Letter _ => True
|
||||
Number _ => not ('0' <= ch && ch <= '9')
|
||||
_ => False
|
||||
|
||||
export
|
||||
isIdCont : Char -> Bool
|
||||
isIdCont ch =
|
||||
isIdStart ch || ch == '\'' ||
|
||||
case genCat ch of
|
||||
Mark _ => True
|
||||
Number _ => True
|
||||
_ => False
|
||||
|
||||
export
|
||||
isIdConnector : Char -> Bool
|
||||
isIdConnector ch =
|
||||
case genCat ch of Punctuation Connector => True; _ => False
|
||||
|
||||
|
||||
export
|
||||
isSymChar : Char -> Bool
|
||||
isSymChar ch =
|
||||
case genCat ch of
|
||||
Symbol _ => True
|
||||
Punctuation Dash => True
|
||||
Punctuation Other => True
|
||||
_ => False
|
||||
|
||||
export
|
||||
isWhitespace : Char -> Bool
|
||||
isWhitespace ch =
|
||||
ch == '\t' || ch == '\r' || ch == '\n' ||
|
||||
case genCat ch of Separator _ => True; _ => False
|
||||
|
||||
|
||||
export
|
||||
%foreign "scheme:string-normalize-nfc"
|
||||
normalizeNfc : String -> String
|
Loading…
Add table
Add a link
Reference in a new issue