remove lex/parse stuff for now

2023-01-08 14:58:18 +01:00 · 2023-01-08 14:58:18 +01:00 · 8443b2f6d8
commit 8443b2f6d8
parent 44c4aea06c
9 changed files with 10 additions and 10 deletions
--- a/lib/on-hold/Quox/Lexer.idr
+++ b/lib/on-hold/Quox/Lexer.idr
@ -0,0 +1,102 @@
+module Quox.Lexer
+
+import public Quox.Token
+
+import Data.String
+import Data.String.Extra
+import public Text.Lexer
+import public Text.Lexer.Tokenizer
+import Control.Monad.Either
+import Generics.Derive
+
+%default total
+%language ElabReflection
+
+
+public export
+record Error where
+  constructor Err
+  reason    : StopReason
+  line, col : Int
+  char      : Char
+
+
+
+nameStart = pred $ \c => isAlpha c || c == '_'
+nameCont  = pred $ \c => isAlphaNum c || c == '_' || c == '\''
+
+name = nameStart <+> many nameCont <+> reject nameCont
+
+wild = is '_' <+> reject nameCont
+
+%hide Text.Lexer.symbol
+symbol = is '\'' <+> name
+
+decimal = some digit <+> reject nameCont
+
+
+natToNumber : Nat -> Number
+natToNumber 0 = Zero
+natToNumber 1 = One
+natToNumber k = Other k
+
+
+skip : Lexer -> Tokenizer (Maybe a)
+skip lex = match lex $ const Nothing
+
+simple : Char -> a -> Tokenizer (Maybe a)
+simple ch = match (is ch) . const . Just
+
+keyword : String -> Keyword -> Tokenizer (Maybe Token)
+keyword str = match (exact str <+> reject nameCont) . const . Just . K
+
+choice : (xs : List (Tokenizer a)) -> {auto 0 _ : NonEmpty xs} -> Tokenizer a
+choice (t :: ts) = foldl (\a, b => a <|> b) t ts
+
+match : Lexer -> (String -> a) -> Tokenizer (Maybe a)
+match lex f = Tokenizer.match lex (Just . f)
+%hide Tokenizer.match
+
+
+tokens : Tokenizer (Maybe Token)
+tokens = choice [
+  skip $ lineComment $ exact "--",
+  skip $ blockComment (exact "{-") (exact "-}"),
+  skip spaces,
+
+  simple '(' $ P LParen,  simple ')' $ P RParen,
+  simple '[' $ P LSquare, simple ']' $ P RSquare,
+  simple '{' $ P LBrace,  simple '}' $ P RBrace,
+  simple ',' $ P Comma,
+  simple '∷' $ P DblColon,
+  simple ':' $ P Colon, -- needs to be after '::'
+  simple '.' $ P Dot,
+
+  simple '→' $ P Arrow,
+  simple '⇒' $ P DblArrow,
+  simple '×' $ P Times,
+  simple '⊲' $ P Triangle,
+  match wild $ const $ P Wild,
+
+  keyword "λ"    Lam,
+  keyword "let"  Let,  keyword "in" In,
+  keyword "case" Case, keyword "of" Of,
+  keyword "ω"    Omega,
+  keyword "Π"    Pi,   keyword "Σ"  Sigma, keyword "W" W,
+
+  match name   $ Name,
+  match symbol $ Symbol . assert_total strTail,
+
+  match decimal $ N . natToNumber . cast,
+  match (is '★' <+> decimal) $ TYPE . cast . assert_total strTail
+]
+
+
+export
+lex : String -> Either Error (List BToken)
+lex str =
+  let (res, (reason, line, col, str)) = lex tokens str in
+  case reason of
+       EndInput => Right $ mapMaybe sequence res
+       _        => let char = assert_total strIndex str 0 in
+                   Left $ Err {reason, line, col, char}
--- a/lib/on-hold/Quox/Parser.idr
+++ b/lib/on-hold/Quox/Parser.idr
@ -0,0 +1,159 @@
+module Quox.Parser
+
+import Quox.Syntax
+import Quox.Token
+import Quox.Lexer
+
+import Data.Maybe
+import Data.SnocVect
+import Data.SnocList
+import Text.Parser
+
+%default total
+
+
+public export
+Vars : Nat -> Type
+Vars n = SnocVect n String
+
+public export
+Grammar : Bool -> Type -> Type
+Grammar = Core.Grammar () Token
+%hide Core.Grammar
+
+public export
+data Error
+= Lex (Lexer.Error)
+| Parse (List1 (ParsingError Token))
+| Leftover (List BToken)
+%hide Lexer.Error
+
+
+public export
+parseAll : {c : Bool} -> Grammar c a -> List BToken -> Either Error a
+parseAll grm input =
+  case parse grm input of
+       Right (x, [])   => Right x
+       Right (x, rest) => Left $ Leftover rest
+       Left  errs      => Left $ Parse errs
+
+public export
+lexParseAll : {c : Bool} -> Grammar c a -> String -> Either Error a
+lexParseAll grm = lex' >=> parseAll grm
+  where lex' : String -> Either Error (List BToken)
+        lex' = bimap Lex id . lex
+
+
+
+export
+punc : Punc -> Grammar True ()
+punc p = terminal (show p) $ \case
+  P p' => if p == p' then Just () else Nothing
+  _    => Nothing
+
+export
+keyword : Keyword -> Grammar True ()
+keyword k = terminal (show k) $ \case
+  K k' => if k == k' then Just () else Nothing
+  _    => Nothing
+
+export
+between : Punc -> Punc -> Grammar True a -> Grammar True a
+between opener closer inner = punc opener *> inner <* punc closer
+
+export
+parens, squares, braces : Grammar True a -> Grammar True a
+parens  = between LParen  RParen
+squares = between LSquare RSquare
+braces  = between LBrace  RBrace
+
+
+export
+number : Grammar True Nat
+number = terminal "number" $ \case
+  N Zero      => Just 0
+  N One       => Just 1
+  N (Other k) => Just k
+  _           => Nothing
+
+export
+universe : Grammar True Nat
+universe = terminal "universe" $ \case TYPE k => Just k; _ => Nothing
+
+export
+zero, one, omega : Grammar True ()
+zero  = terminal "0" $ \case N Zero  => Just (); _ => Nothing
+one   = terminal "1" $ \case N One   => Just (); _ => Nothing
+omega = terminal "ω" $ \case K Omega => Just (); _ => Nothing
+
+export
+quantity : Grammar True Qty
+quantity = Zero <$ zero <|> One <$ one <|> Any <$ omega
+
+
+find1 : Eq a => SnocVect k a -> a -> Maybe (Var k)
+find1 [<]       y = Nothing
+find1 (sx :< x) y = if x == y then Just VZ else VS <$> find1 sx y
+
+find : Vars k -> Name -> Maybe (Var k)
+find vs (MakeName [<] (UN y)) = find1 vs y
+find _  _                     = Nothing
+
+
+export
+checkAvoid1 : Vars n -> String -> Grammar False ()
+checkAvoid1 avoid y =
+  when (isJust $ find1 avoid y) $
+    fail "wrong type of bound variable: \{show y}"
+
+export
+checkAvoid : Vars n -> Name -> Grammar False ()
+checkAvoid avoid (MakeName [<] (UN y)) = checkAvoid1 avoid y
+checkAvoid _     _                     = pure ()
+
+export
+bound : (what : String) -> (bound : Vars k) -> (avoid : Vars n) ->
+        Grammar True (Var k)
+bound what vs avoid = do
+  x <- terminal "bound \{what} variable" $ \case Name x => Just x; _ => Nothing
+  checkAvoid1 avoid x
+  maybe (fail "not in scope: \{x}") pure $ find1 vs x
+
+export
+sname : Grammar True String
+sname = terminal "simple name" $ \case Name x => pure x; _ => Nothing
+
+export
+qname : Grammar True Name
+qname = do
+  parts <- sepBy1 (punc Dot) sname
+  pure $ MakeName {mods = cast $ init parts, base = UN $ last parts}
+
+export
+nameWith : (bound : Vars k) -> (avoid : Vars n) ->
+           Grammar True (Either (Var k) Name)
+nameWith bound avoid = do
+  y <- qname
+  checkAvoid avoid y
+  pure $ maybe (Right y) Left $ find bound y
+
+
+export
+dimension : (dvars : Vars d) -> (tvars : Vars n) -> Grammar True (Dim d)
+dimension dvars tvars =
+      K Zero <$  zero
+  <|> K One  <$  one
+  <|> B      <$> bound "dimension" {bound = dvars, avoid = tvars}
+
+
+mutual
+  export
+  term : (dvars : Vars d) -> (tvars : Vars n) -> Grammar True (Term d n)
+  term dvars tvars =
+        E <$> squares (elim {dvars, tvars})
+    <|> TYPE . U <$> universe
+
+  export
+  elim : (dvars : Vars d) -> (tvars : Vars n) -> Grammar True (Elim d n)
+  elim dvars tvars =
+        either B F <$> nameWith {bound = tvars, avoid = dvars}
--- a/lib/on-hold/Quox/Token.idr
+++ b/lib/on-hold/Quox/Token.idr
@ -0,0 +1,49 @@
+module Quox.Token
+
+import Generics.Derive
+import Text.Lexer
+
+%default total
+%language ElabReflection
+
+
+public export
+data Punc
+= LParen  | RParen
+| LSquare | RSquare
+| LBrace  | RBrace
+| Comma
+| Colon | DblColon
+| Dot
+| Arrow | DblArrow
+| Times | Triangle
+| Wild
+%runElab derive "Punc" [Generic, Meta, Eq, Ord, DecEq, Show]
+
+
+public export
+data Keyword
+= Lam | Let | In | Case | Of | Omega
+| Pi | Sigma | W
+%runElab derive "Keyword" [Generic, Meta, Eq, Ord, DecEq, Show]
+
+
+||| zero and one are separate because they are
+||| quantity & dimension constants
+public export
+data Number = Zero | One | Other Nat
+%runElab derive "Number" [Generic, Meta, Eq, Ord, DecEq, Show]
+
+
+public export
+data Token
+= P Punc
+| K Keyword
+| Name String | Symbol String
+| N Number | TYPE Nat
+%runElab derive "Token" [Generic, Meta, Eq, Ord, DecEq, Show]
+
+
+public export
+BToken : Type
+BToken = WithBounds Token
--- a/lib/on-hold/Quox/Unicode.idr
+++ b/lib/on-hold/Quox/Unicode.idr
@ -0,0 +1,140 @@
+module Quox.Unicode
+
+import Generics.Derive
+
+
+%default total
+%language ElabReflection
+
+
+namespace Letter
+  public export
+  data Letter = Uppercase | Lowercase | Titlecase | Modifier | Other
+  %runElab derive "Letter" [Generic, Meta, Eq, Ord, DecEq, Show]
+
+namespace Mark
+  public export
+  data Mark = NonSpacing | SpacingCombining | Enclosing
+  %runElab derive "Mark" [Generic, Meta, Eq, Ord, DecEq, Show]
+
+namespace Number
+  public export
+  data Number = Decimal | Letter | Other
+  %runElab derive "Number" [Generic, Meta, Eq, Ord, DecEq, Show]
+
+namespace Punctuation
+  public export
+  data Punctuation = Connector | Dash | Open | Close
+                   | InitialQuote | FinalQuote | Other
+  %runElab derive "Punctuation" [Generic, Meta, Eq, Ord, DecEq, Show]
+
+namespace Symbol
+  public export
+  data Symbol = Math | Currency | Modifier | Other
+  %runElab derive "Symbol" [Generic, Meta, Eq, Ord, DecEq, Show]
+
+namespace Separator
+  public export
+  data Separator = Space | Line | Paragraph
+  %runElab derive "Separator" [Generic, Meta, Eq, Ord, DecEq, Show]
+
+namespace Other
+  public export
+  data Other = Control | Format | Surrogate | PrivateUse | NotAssigned
+  %runElab derive "Other" [Generic, Meta, Eq, Ord, DecEq, Show]
+
+
+public export
+data GeneralCategory
+= Letter      Letter
+| Mark        Mark
+| Number      Number
+| Punctuation Punctuation
+| Symbol      Symbol
+| Separator   Separator
+| Other       Other
+%runElab derive "GeneralCategory" [Generic, Meta, Eq, Ord, DecEq, Show]
+
+
+private
+%foreign "scheme:(lambda (c) (symbol->string (char-general-category c)))"
+prim__genCat : Char -> String
+
+export
+genCat : Char -> GeneralCategory
+genCat ch = assert_total $
+  case prim__genCat ch of
+       "Lu" => Letter      Uppercase
+       "Ll" => Letter      Lowercase
+       "Lt" => Letter      Titlecase
+       "Lm" => Letter      Modifier
+       "Lo" => Letter      Other
+       "Mn" => Mark        NonSpacing
+       "Mc" => Mark        SpacingCombining
+       "Me" => Mark        Enclosing
+       "Nd" => Number      Decimal
+       "Nl" => Number      Letter
+       "No" => Number      Other
+       "Pc" => Punctuation Connector
+       "Pd" => Punctuation Dash
+       "Ps" => Punctuation Open
+       "Pe" => Punctuation Close
+       "Pi" => Punctuation InitialQuote
+       "Pf" => Punctuation FinalQuote
+       "Po" => Punctuation Other
+       "Sm" => Symbol      Math
+       "Sc" => Symbol      Currency
+       "Sk" => Symbol      Modifier
+       "So" => Symbol      Other
+       "Zs" => Separator   Space
+       "Zl" => Separator   Line
+       "Zp" => Separator   Paragraph
+       "Cc" => Other       Control
+       "Cf" => Other       Format
+       "Cs" => Other       Surrogate
+       "Co" => Other       PrivateUse
+       "Cn" => Other       NotAssigned
+
+
+export
+isIdStart : Char -> Bool
+isIdStart ch =
+  case genCat ch of
+       Letter _ => True
+       Number _ => not ('0' <= ch && ch <= '9')
+       _        => False
+
+export
+isIdCont : Char -> Bool
+isIdCont ch =
+  isIdStart ch || ch == '\'' ||
+  case genCat ch of
+       Mark   _ => True
+       Number _ => True
+       _        => False
+
+export
+isIdConnector : Char -> Bool
+isIdConnector ch =
+  case genCat ch of Punctuation Connector => True; _ => False
+
+
+export
+isSymChar : Char -> Bool
+isSymChar ch =
+  case genCat ch of
+       Symbol      _     => True
+       Punctuation Dash  => True
+       Punctuation Other => True
+       _                 => False
+
+export
+isWhitespace : Char -> Bool
+isWhitespace ch =
+  ch == '\t' || ch == '\r' || ch == '\n' ||
+  case genCat ch of Separator _ => True; _ => False
+
+
+export
+%foreign "scheme:string-normalize-nfc"
+normalizeNfc : String -> String