detect reserved words inside names like 'a.λ.b'
This commit is contained in:
parent
d4de74eab6
commit
f04c4619ef
3 changed files with 95 additions and 44 deletions
|
@ -39,12 +39,14 @@ data Error =
|
|||
export
|
||||
prettyLexError : {opts : _} -> String -> LexError -> Eff Pretty (Doc opts)
|
||||
prettyLexError file (Err reason line col char) = do
|
||||
let loc = makeLoc file (MkBounds line col line col)
|
||||
reason <- case reason of
|
||||
EndInput => pure "unexpected end of input"
|
||||
NoRuleApply => pure $ text "unrecognised character: \{show char}"
|
||||
Other msg => pure $ text msg
|
||||
NoRuleApply => case char of
|
||||
Just char => pure $ text "unrecognised character: \{show char}"
|
||||
Nothing => pure $ text "unexpected end of input"
|
||||
ComposeNotClosing (sl, sc) (el, ec) => pure $
|
||||
hsep ["unterminated token at", !(prettyBounds (MkBounds sl sc el ec))]
|
||||
let loc = makeLoc file (MkBounds line col line col)
|
||||
pure $ vappend !(prettyLoc loc) reason
|
||||
|
||||
export
|
||||
|
|
|
@ -34,16 +34,27 @@ data Token =
|
|||
| Sup Nat
|
||||
%runElab derive "Token" [Eq, Ord, Show]
|
||||
|
||||
-- token or whitespace
|
||||
||| token or whitespace
|
||||
||| @ Skip whitespace, comments, etc
|
||||
||| @ Invalid a token which failed a post-lexer check
|
||||
||| (e.g. a qualified name containing a keyword)
|
||||
||| @ T a well formed token
|
||||
public export
|
||||
0 TokenW : Type
|
||||
TokenW = Maybe Token
|
||||
data ExtToken = Skip | Invalid String String | T Token
|
||||
%runElab derive "ExtToken" [Eq, Ord, Show]
|
||||
|
||||
|
||||
public export
|
||||
data ErrorReason =
|
||||
NoRuleApply
|
||||
| ComposeNotClosing (Int, Int) (Int, Int)
|
||||
| Other String
|
||||
%runElab derive "ErrorReason" [Eq, Ord, Show]
|
||||
|
||||
public export
|
||||
record Error where
|
||||
constructor Err
|
||||
reason : StopReason
|
||||
reason : ErrorReason
|
||||
line, col : Int
|
||||
||| `Nothing` if the error is at the end of the input
|
||||
char : Maybe Char
|
||||
|
@ -52,19 +63,14 @@ record Error where
|
|||
|
||||
|
||||
private
|
||||
skip : Lexer -> Tokenizer TokenW
|
||||
skip t = match t $ const Nothing
|
||||
skip : Lexer -> Tokenizer ExtToken
|
||||
skip t = match t $ const Skip
|
||||
|
||||
private
|
||||
match : Lexer -> (String -> Token) -> Tokenizer TokenW
|
||||
match t f = Tokenizer.match t (Just . f)
|
||||
%hide Tokenizer.match
|
||||
tmatch : Lexer -> (String -> Token) -> Tokenizer ExtToken
|
||||
tmatch t f = match t (T . f)
|
||||
|
||||
|
||||
private
|
||||
name : Tokenizer TokenW
|
||||
name = match name $ Name . fromListP . split (== '.') . normalizeNfc
|
||||
|
||||
||| [todo] escapes other than `\"` and (accidentally) `\\`
|
||||
export
|
||||
fromStringLit : String -> String
|
||||
|
@ -76,17 +82,17 @@ fromStringLit = pack . go . unpack . drop 1 . dropLast 1 where
|
|||
go (c :: cs) = c :: go cs
|
||||
|
||||
private
|
||||
string : Tokenizer TokenW
|
||||
string = match stringLit (Str . fromStringLit)
|
||||
string : Tokenizer ExtToken
|
||||
string = tmatch stringLit (Str . fromStringLit)
|
||||
|
||||
private
|
||||
nat : Tokenizer TokenW
|
||||
nat = match (some (range '0' '9')) (Nat . cast)
|
||||
nat : Tokenizer ExtToken
|
||||
nat = tmatch (some (range '0' '9')) (Nat . cast)
|
||||
|
||||
private
|
||||
tag : Tokenizer TokenW
|
||||
tag = match (is '\'' <+> name) (Tag . drop 1)
|
||||
<|> match (is '\'' <+> stringLit) (Tag . fromStringLit . drop 1)
|
||||
tag : Tokenizer ExtToken
|
||||
tag = tmatch (is '\'' <+> name) (Tag . drop 1)
|
||||
<|> tmatch (is '\'' <+> stringLit) (Tag . fromStringLit . drop 1)
|
||||
|
||||
|
||||
|
||||
|
@ -112,17 +118,17 @@ supToNat = cast . pack . map fromSup . unpack
|
|||
|
||||
-- ★0, Type0. base ★/Type is a Reserved
|
||||
private
|
||||
universe : Tokenizer TokenW
|
||||
universe : Tokenizer ExtToken
|
||||
universe = universeWith "★" <|> universeWith "Type" where
|
||||
universeWith : String -> Tokenizer TokenW
|
||||
universeWith : String -> Tokenizer ExtToken
|
||||
universeWith pfx =
|
||||
let len = length pfx in
|
||||
match (exact pfx <+> digits) (TYPE . cast . drop len)
|
||||
tmatch (exact pfx <+> digits) (TYPE . cast . drop len)
|
||||
|
||||
private
|
||||
sup : Tokenizer TokenW
|
||||
sup = match (some $ pred isSupDigit) (Sup . supToNat)
|
||||
<|> match (is '^' <+> digits) (Sup . cast . drop 1)
|
||||
sup : Tokenizer ExtToken
|
||||
sup = tmatch (some $ pred isSupDigit) (Sup . supToNat)
|
||||
<|> tmatch (is '^' <+> digits) (Sup . cast . drop 1)
|
||||
|
||||
|
||||
private %inline
|
||||
|
@ -165,17 +171,23 @@ resString : Reserved -> String
|
|||
resString (Only r) = resString1 r
|
||||
resString (r `Or` _) = resString1 r
|
||||
|
||||
||| return both representative strings for a token description
|
||||
public export
|
||||
resString2 : Reserved -> List String
|
||||
resString2 (Only r) = [resString1 r]
|
||||
resString2 (r `Or` s) = [resString1 r, resString1 s]
|
||||
|
||||
private
|
||||
resTokenizer1 : Reserved1 -> String -> Tokenizer TokenW
|
||||
resTokenizer1 : Reserved1 -> String -> Tokenizer ExtToken
|
||||
resTokenizer1 r str =
|
||||
let res : String -> Token := const $ Reserved str in
|
||||
case r of Word w => match (exact w <+> reject idContEnd) res
|
||||
Sym s => match (exact s <+> reject symCont) res
|
||||
Punc x => match (exact x) res
|
||||
case r of Word w => tmatch (exact w <+> reject idContEnd) res
|
||||
Sym s => tmatch (exact s <+> reject symCont) res
|
||||
Punc x => tmatch (exact x) res
|
||||
|
||||
||| match a reserved token
|
||||
export
|
||||
resTokenizer : Reserved -> Tokenizer TokenW
|
||||
resTokenizer : Reserved -> Tokenizer ExtToken
|
||||
resTokenizer (Only r) = resTokenizer1 r (resString1 r)
|
||||
resTokenizer (r `Or` s) =
|
||||
resTokenizer1 r (resString1 r) <|> resTokenizer1 s (resString1 r)
|
||||
|
@ -219,14 +231,31 @@ reserved =
|
|||
Word1 "load",
|
||||
Word1 "namespace"]
|
||||
|
||||
public export
|
||||
reservedStrings : List String
|
||||
reservedStrings = map resString reserved
|
||||
|
||||
public export
|
||||
allReservedStrings : List String
|
||||
allReservedStrings = foldMap resString2 reserved
|
||||
|
||||
||| `IsReserved str` is true if `Reserved str` might actually show up in
|
||||
||| the token stream
|
||||
public export
|
||||
IsReserved : String -> Type
|
||||
IsReserved str = str `Elem` map resString reserved
|
||||
IsReserved str = So (str `elem` allReservedStrings)
|
||||
|
||||
private
|
||||
name : Tokenizer ExtToken
|
||||
name =
|
||||
match name $ \str =>
|
||||
let parts = split (== '.') $ normalizeNfc str in
|
||||
case find (`elem` allReservedStrings) (toList parts) of
|
||||
Nothing => T $ Name $ fromListP parts
|
||||
Just w => Invalid "reserved word '\{w}' inside name \{str}" str
|
||||
|
||||
export
|
||||
tokens : Tokenizer TokenW
|
||||
tokens : Tokenizer ExtToken
|
||||
tokens = choice $
|
||||
map skip [pred isWhitespace,
|
||||
lineComment (exact "--" <+> reject symCont),
|
||||
|
@ -235,10 +264,24 @@ tokens = choice $
|
|||
map resTokenizer reserved <+>
|
||||
[sup, nat, string, tag, name]
|
||||
|
||||
export
|
||||
check : Alternative f =>
|
||||
WithBounds ExtToken -> Either Error (f (WithBounds Token))
|
||||
check (MkBounded val irr bounds@(MkBounds line col _ _)) = case val of
|
||||
Skip => Right empty
|
||||
T tok => Right $ pure $ MkBounded tok irr bounds
|
||||
Invalid msg tok => Left $ Err (Other msg) line col (index 0 tok)
|
||||
|
||||
export
|
||||
toErrorReason : StopReason -> Maybe ErrorReason
|
||||
toErrorReason EndInput = Nothing
|
||||
toErrorReason NoRuleApply = Just NoRuleApply
|
||||
toErrorReason (ComposeNotClosing s e) = Just $ ComposeNotClosing s e
|
||||
|
||||
export
|
||||
lex : String -> Either Error (List (WithBounds Token))
|
||||
lex str =
|
||||
let (res, reason, line, col, str) = lex tokens str in
|
||||
case reason of
|
||||
EndInput => Right $ mapMaybe sequence res
|
||||
_ => Left $ Err {reason, line, col, char = index 0 str}
|
||||
case toErrorReason reason of
|
||||
Nothing => concatMap check res @{MonoidApplicative}
|
||||
Just e => Left $ Err {reason = e, line, col, char = index 0 str}
|
||||
|
|
|
@ -47,7 +47,12 @@ tests = "lexer" :- [
|
|||
lexes " " [],
|
||||
lexes "-- line comment" [],
|
||||
lexes "name -- line comment" [Name "name"],
|
||||
lexes "-- line comment\nnameBetween -- and another" [Name "nameBetween"],
|
||||
lexes
|
||||
"""
|
||||
-- line comment
|
||||
nameBetween -- and another
|
||||
"""
|
||||
[Name "nameBetween"],
|
||||
lexes "{- block comment -}" [],
|
||||
lexes "{- {- nested -} block comment -}" []
|
||||
],
|
||||
|
@ -70,13 +75,14 @@ tests = "lexer" :- [
|
|||
lexes "normalïse" [Name "normalïse"],
|
||||
-- ↑ replace i + combining ¨ with precomposed ï
|
||||
lexes "map#" [Name "map#"],
|
||||
lexes "map#[" [Name "map#", Reserved "["], -- don't actually do this
|
||||
lexes "map #[" [Name "map", Reserved "#["],
|
||||
lexes "write!" [Name "write!"],
|
||||
lexes "uhh??!?!?!?" [Name "uhh??!?!?!?"],
|
||||
|
||||
todo "check for reserved words in a qname",
|
||||
skip $
|
||||
lexes "abc.fun.def"
|
||||
[Name "abc", Reserved ".", Reserved "λ", Reserved ".", Name "def"],
|
||||
lexFail "abc.fun.ghi",
|
||||
lexFail "abc.λ.ghi",
|
||||
lexFail "abc.ω.ghi",
|
||||
|
||||
lexes "+" [Name "+"],
|
||||
lexes "*" [Name "*"],
|
||||
|
|
Loading…
Reference in a new issue