detect reserved words inside names like 'a.λ.b'
This commit is contained in:
parent
d4de74eab6
commit
f04c4619ef
3 changed files with 95 additions and 44 deletions
|
@ -39,12 +39,14 @@ data Error =
|
||||||
export
|
export
|
||||||
prettyLexError : {opts : _} -> String -> LexError -> Eff Pretty (Doc opts)
|
prettyLexError : {opts : _} -> String -> LexError -> Eff Pretty (Doc opts)
|
||||||
prettyLexError file (Err reason line col char) = do
|
prettyLexError file (Err reason line col char) = do
|
||||||
let loc = makeLoc file (MkBounds line col line col)
|
|
||||||
reason <- case reason of
|
reason <- case reason of
|
||||||
EndInput => pure "unexpected end of input"
|
Other msg => pure $ text msg
|
||||||
NoRuleApply => pure $ text "unrecognised character: \{show char}"
|
NoRuleApply => case char of
|
||||||
|
Just char => pure $ text "unrecognised character: \{show char}"
|
||||||
|
Nothing => pure $ text "unexpected end of input"
|
||||||
ComposeNotClosing (sl, sc) (el, ec) => pure $
|
ComposeNotClosing (sl, sc) (el, ec) => pure $
|
||||||
hsep ["unterminated token at", !(prettyBounds (MkBounds sl sc el ec))]
|
hsep ["unterminated token at", !(prettyBounds (MkBounds sl sc el ec))]
|
||||||
|
let loc = makeLoc file (MkBounds line col line col)
|
||||||
pure $ vappend !(prettyLoc loc) reason
|
pure $ vappend !(prettyLoc loc) reason
|
||||||
|
|
||||||
export
|
export
|
||||||
|
|
|
@ -34,16 +34,27 @@ data Token =
|
||||||
| Sup Nat
|
| Sup Nat
|
||||||
%runElab derive "Token" [Eq, Ord, Show]
|
%runElab derive "Token" [Eq, Ord, Show]
|
||||||
|
|
||||||
-- token or whitespace
|
||| token or whitespace
|
||||||
|
||| @ Skip whitespace, comments, etc
|
||||||
|
||| @ Invalid a token which failed a post-lexer check
|
||||||
|
||| (e.g. a qualified name containing a keyword)
|
||||||
|
||| @ T a well formed token
|
||||||
public export
|
public export
|
||||||
0 TokenW : Type
|
data ExtToken = Skip | Invalid String String | T Token
|
||||||
TokenW = Maybe Token
|
%runElab derive "ExtToken" [Eq, Ord, Show]
|
||||||
|
|
||||||
|
|
||||||
|
public export
|
||||||
|
data ErrorReason =
|
||||||
|
NoRuleApply
|
||||||
|
| ComposeNotClosing (Int, Int) (Int, Int)
|
||||||
|
| Other String
|
||||||
|
%runElab derive "ErrorReason" [Eq, Ord, Show]
|
||||||
|
|
||||||
public export
|
public export
|
||||||
record Error where
|
record Error where
|
||||||
constructor Err
|
constructor Err
|
||||||
reason : StopReason
|
reason : ErrorReason
|
||||||
line, col : Int
|
line, col : Int
|
||||||
||| `Nothing` if the error is at the end of the input
|
||| `Nothing` if the error is at the end of the input
|
||||||
char : Maybe Char
|
char : Maybe Char
|
||||||
|
@ -52,19 +63,14 @@ record Error where
|
||||||
|
|
||||||
|
|
||||||
private
|
private
|
||||||
skip : Lexer -> Tokenizer TokenW
|
skip : Lexer -> Tokenizer ExtToken
|
||||||
skip t = match t $ const Nothing
|
skip t = match t $ const Skip
|
||||||
|
|
||||||
private
|
private
|
||||||
match : Lexer -> (String -> Token) -> Tokenizer TokenW
|
tmatch : Lexer -> (String -> Token) -> Tokenizer ExtToken
|
||||||
match t f = Tokenizer.match t (Just . f)
|
tmatch t f = match t (T . f)
|
||||||
%hide Tokenizer.match
|
|
||||||
|
|
||||||
|
|
||||||
private
|
|
||||||
name : Tokenizer TokenW
|
|
||||||
name = match name $ Name . fromListP . split (== '.') . normalizeNfc
|
|
||||||
|
|
||||||
||| [todo] escapes other than `\"` and (accidentally) `\\`
|
||| [todo] escapes other than `\"` and (accidentally) `\\`
|
||||||
export
|
export
|
||||||
fromStringLit : String -> String
|
fromStringLit : String -> String
|
||||||
|
@ -76,17 +82,17 @@ fromStringLit = pack . go . unpack . drop 1 . dropLast 1 where
|
||||||
go (c :: cs) = c :: go cs
|
go (c :: cs) = c :: go cs
|
||||||
|
|
||||||
private
|
private
|
||||||
string : Tokenizer TokenW
|
string : Tokenizer ExtToken
|
||||||
string = match stringLit (Str . fromStringLit)
|
string = tmatch stringLit (Str . fromStringLit)
|
||||||
|
|
||||||
private
|
private
|
||||||
nat : Tokenizer TokenW
|
nat : Tokenizer ExtToken
|
||||||
nat = match (some (range '0' '9')) (Nat . cast)
|
nat = tmatch (some (range '0' '9')) (Nat . cast)
|
||||||
|
|
||||||
private
|
private
|
||||||
tag : Tokenizer TokenW
|
tag : Tokenizer ExtToken
|
||||||
tag = match (is '\'' <+> name) (Tag . drop 1)
|
tag = tmatch (is '\'' <+> name) (Tag . drop 1)
|
||||||
<|> match (is '\'' <+> stringLit) (Tag . fromStringLit . drop 1)
|
<|> tmatch (is '\'' <+> stringLit) (Tag . fromStringLit . drop 1)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -112,17 +118,17 @@ supToNat = cast . pack . map fromSup . unpack
|
||||||
|
|
||||||
-- ★0, Type0. base ★/Type is a Reserved
|
-- ★0, Type0. base ★/Type is a Reserved
|
||||||
private
|
private
|
||||||
universe : Tokenizer TokenW
|
universe : Tokenizer ExtToken
|
||||||
universe = universeWith "★" <|> universeWith "Type" where
|
universe = universeWith "★" <|> universeWith "Type" where
|
||||||
universeWith : String -> Tokenizer TokenW
|
universeWith : String -> Tokenizer ExtToken
|
||||||
universeWith pfx =
|
universeWith pfx =
|
||||||
let len = length pfx in
|
let len = length pfx in
|
||||||
match (exact pfx <+> digits) (TYPE . cast . drop len)
|
tmatch (exact pfx <+> digits) (TYPE . cast . drop len)
|
||||||
|
|
||||||
private
|
private
|
||||||
sup : Tokenizer TokenW
|
sup : Tokenizer ExtToken
|
||||||
sup = match (some $ pred isSupDigit) (Sup . supToNat)
|
sup = tmatch (some $ pred isSupDigit) (Sup . supToNat)
|
||||||
<|> match (is '^' <+> digits) (Sup . cast . drop 1)
|
<|> tmatch (is '^' <+> digits) (Sup . cast . drop 1)
|
||||||
|
|
||||||
|
|
||||||
private %inline
|
private %inline
|
||||||
|
@ -165,17 +171,23 @@ resString : Reserved -> String
|
||||||
resString (Only r) = resString1 r
|
resString (Only r) = resString1 r
|
||||||
resString (r `Or` _) = resString1 r
|
resString (r `Or` _) = resString1 r
|
||||||
|
|
||||||
|
||| return both representative strings for a token description
|
||||||
|
public export
|
||||||
|
resString2 : Reserved -> List String
|
||||||
|
resString2 (Only r) = [resString1 r]
|
||||||
|
resString2 (r `Or` s) = [resString1 r, resString1 s]
|
||||||
|
|
||||||
private
|
private
|
||||||
resTokenizer1 : Reserved1 -> String -> Tokenizer TokenW
|
resTokenizer1 : Reserved1 -> String -> Tokenizer ExtToken
|
||||||
resTokenizer1 r str =
|
resTokenizer1 r str =
|
||||||
let res : String -> Token := const $ Reserved str in
|
let res : String -> Token := const $ Reserved str in
|
||||||
case r of Word w => match (exact w <+> reject idContEnd) res
|
case r of Word w => tmatch (exact w <+> reject idContEnd) res
|
||||||
Sym s => match (exact s <+> reject symCont) res
|
Sym s => tmatch (exact s <+> reject symCont) res
|
||||||
Punc x => match (exact x) res
|
Punc x => tmatch (exact x) res
|
||||||
|
|
||||||
||| match a reserved token
|
||| match a reserved token
|
||||||
export
|
export
|
||||||
resTokenizer : Reserved -> Tokenizer TokenW
|
resTokenizer : Reserved -> Tokenizer ExtToken
|
||||||
resTokenizer (Only r) = resTokenizer1 r (resString1 r)
|
resTokenizer (Only r) = resTokenizer1 r (resString1 r)
|
||||||
resTokenizer (r `Or` s) =
|
resTokenizer (r `Or` s) =
|
||||||
resTokenizer1 r (resString1 r) <|> resTokenizer1 s (resString1 r)
|
resTokenizer1 r (resString1 r) <|> resTokenizer1 s (resString1 r)
|
||||||
|
@ -219,14 +231,31 @@ reserved =
|
||||||
Word1 "load",
|
Word1 "load",
|
||||||
Word1 "namespace"]
|
Word1 "namespace"]
|
||||||
|
|
||||||
|
public export
|
||||||
|
reservedStrings : List String
|
||||||
|
reservedStrings = map resString reserved
|
||||||
|
|
||||||
|
public export
|
||||||
|
allReservedStrings : List String
|
||||||
|
allReservedStrings = foldMap resString2 reserved
|
||||||
|
|
||||||
||| `IsReserved str` is true if `Reserved str` might actually show up in
|
||| `IsReserved str` is true if `Reserved str` might actually show up in
|
||||||
||| the token stream
|
||| the token stream
|
||||||
public export
|
public export
|
||||||
IsReserved : String -> Type
|
IsReserved : String -> Type
|
||||||
IsReserved str = str `Elem` map resString reserved
|
IsReserved str = So (str `elem` allReservedStrings)
|
||||||
|
|
||||||
|
private
|
||||||
|
name : Tokenizer ExtToken
|
||||||
|
name =
|
||||||
|
match name $ \str =>
|
||||||
|
let parts = split (== '.') $ normalizeNfc str in
|
||||||
|
case find (`elem` allReservedStrings) (toList parts) of
|
||||||
|
Nothing => T $ Name $ fromListP parts
|
||||||
|
Just w => Invalid "reserved word '\{w}' inside name \{str}" str
|
||||||
|
|
||||||
export
|
export
|
||||||
tokens : Tokenizer TokenW
|
tokens : Tokenizer ExtToken
|
||||||
tokens = choice $
|
tokens = choice $
|
||||||
map skip [pred isWhitespace,
|
map skip [pred isWhitespace,
|
||||||
lineComment (exact "--" <+> reject symCont),
|
lineComment (exact "--" <+> reject symCont),
|
||||||
|
@ -235,10 +264,24 @@ tokens = choice $
|
||||||
map resTokenizer reserved <+>
|
map resTokenizer reserved <+>
|
||||||
[sup, nat, string, tag, name]
|
[sup, nat, string, tag, name]
|
||||||
|
|
||||||
|
export
|
||||||
|
check : Alternative f =>
|
||||||
|
WithBounds ExtToken -> Either Error (f (WithBounds Token))
|
||||||
|
check (MkBounded val irr bounds@(MkBounds line col _ _)) = case val of
|
||||||
|
Skip => Right empty
|
||||||
|
T tok => Right $ pure $ MkBounded tok irr bounds
|
||||||
|
Invalid msg tok => Left $ Err (Other msg) line col (index 0 tok)
|
||||||
|
|
||||||
|
export
|
||||||
|
toErrorReason : StopReason -> Maybe ErrorReason
|
||||||
|
toErrorReason EndInput = Nothing
|
||||||
|
toErrorReason NoRuleApply = Just NoRuleApply
|
||||||
|
toErrorReason (ComposeNotClosing s e) = Just $ ComposeNotClosing s e
|
||||||
|
|
||||||
export
|
export
|
||||||
lex : String -> Either Error (List (WithBounds Token))
|
lex : String -> Either Error (List (WithBounds Token))
|
||||||
lex str =
|
lex str =
|
||||||
let (res, reason, line, col, str) = lex tokens str in
|
let (res, reason, line, col, str) = lex tokens str in
|
||||||
case reason of
|
case toErrorReason reason of
|
||||||
EndInput => Right $ mapMaybe sequence res
|
Nothing => concatMap check res @{MonoidApplicative}
|
||||||
_ => Left $ Err {reason, line, col, char = index 0 str}
|
Just e => Left $ Err {reason = e, line, col, char = index 0 str}
|
||||||
|
|
|
@ -47,7 +47,12 @@ tests = "lexer" :- [
|
||||||
lexes " " [],
|
lexes " " [],
|
||||||
lexes "-- line comment" [],
|
lexes "-- line comment" [],
|
||||||
lexes "name -- line comment" [Name "name"],
|
lexes "name -- line comment" [Name "name"],
|
||||||
lexes "-- line comment\nnameBetween -- and another" [Name "nameBetween"],
|
lexes
|
||||||
|
"""
|
||||||
|
-- line comment
|
||||||
|
nameBetween -- and another
|
||||||
|
"""
|
||||||
|
[Name "nameBetween"],
|
||||||
lexes "{- block comment -}" [],
|
lexes "{- block comment -}" [],
|
||||||
lexes "{- {- nested -} block comment -}" []
|
lexes "{- {- nested -} block comment -}" []
|
||||||
],
|
],
|
||||||
|
@ -70,13 +75,14 @@ tests = "lexer" :- [
|
||||||
lexes "normalïse" [Name "normalïse"],
|
lexes "normalïse" [Name "normalïse"],
|
||||||
-- ↑ replace i + combining ¨ with precomposed ï
|
-- ↑ replace i + combining ¨ with precomposed ï
|
||||||
lexes "map#" [Name "map#"],
|
lexes "map#" [Name "map#"],
|
||||||
|
lexes "map#[" [Name "map#", Reserved "["], -- don't actually do this
|
||||||
|
lexes "map #[" [Name "map", Reserved "#["],
|
||||||
lexes "write!" [Name "write!"],
|
lexes "write!" [Name "write!"],
|
||||||
lexes "uhh??!?!?!?" [Name "uhh??!?!?!?"],
|
lexes "uhh??!?!?!?" [Name "uhh??!?!?!?"],
|
||||||
|
|
||||||
todo "check for reserved words in a qname",
|
lexFail "abc.fun.ghi",
|
||||||
skip $
|
lexFail "abc.λ.ghi",
|
||||||
lexes "abc.fun.def"
|
lexFail "abc.ω.ghi",
|
||||||
[Name "abc", Reserved ".", Reserved "λ", Reserved ".", Name "def"],
|
|
||||||
|
|
||||||
lexes "+" [Name "+"],
|
lexes "+" [Name "+"],
|
||||||
lexes "*" [Name "*"],
|
lexes "*" [Name "*"],
|
||||||
|
|
Loading…
Reference in a new issue