detect reserved words inside names like 'a.λ.b'

This commit is contained in:
rhiannon morris 2023-09-24 17:36:20 +02:00
parent d4de74eab6
commit f04c4619ef
3 changed files with 95 additions and 44 deletions

View file

@ -39,12 +39,14 @@ data Error =
export export
prettyLexError : {opts : _} -> String -> LexError -> Eff Pretty (Doc opts) prettyLexError : {opts : _} -> String -> LexError -> Eff Pretty (Doc opts)
prettyLexError file (Err reason line col char) = do prettyLexError file (Err reason line col char) = do
let loc = makeLoc file (MkBounds line col line col)
reason <- case reason of reason <- case reason of
EndInput => pure "unexpected end of input" Other msg => pure $ text msg
NoRuleApply => pure $ text "unrecognised character: \{show char}" NoRuleApply => case char of
Just char => pure $ text "unrecognised character: \{show char}"
Nothing => pure $ text "unexpected end of input"
ComposeNotClosing (sl, sc) (el, ec) => pure $ ComposeNotClosing (sl, sc) (el, ec) => pure $
hsep ["unterminated token at", !(prettyBounds (MkBounds sl sc el ec))] hsep ["unterminated token at", !(prettyBounds (MkBounds sl sc el ec))]
let loc = makeLoc file (MkBounds line col line col)
pure $ vappend !(prettyLoc loc) reason pure $ vappend !(prettyLoc loc) reason
export export

View file

@ -34,16 +34,27 @@ data Token =
| Sup Nat | Sup Nat
%runElab derive "Token" [Eq, Ord, Show] %runElab derive "Token" [Eq, Ord, Show]
-- token or whitespace ||| token or whitespace
||| @ Skip whitespace, comments, etc
||| @ Invalid a token which failed a post-lexer check
||| (e.g. a qualified name containing a keyword)
||| @ T a well formed token
public export public export
0 TokenW : Type data ExtToken = Skip | Invalid String String | T Token
TokenW = Maybe Token %runElab derive "ExtToken" [Eq, Ord, Show]
public export
data ErrorReason =
NoRuleApply
| ComposeNotClosing (Int, Int) (Int, Int)
| Other String
%runElab derive "ErrorReason" [Eq, Ord, Show]
public export public export
record Error where record Error where
constructor Err constructor Err
reason : StopReason reason : ErrorReason
line, col : Int line, col : Int
||| `Nothing` if the error is at the end of the input ||| `Nothing` if the error is at the end of the input
char : Maybe Char char : Maybe Char
@ -52,19 +63,14 @@ record Error where
private private
skip : Lexer -> Tokenizer TokenW skip : Lexer -> Tokenizer ExtToken
skip t = match t $ const Nothing skip t = match t $ const Skip
private private
match : Lexer -> (String -> Token) -> Tokenizer TokenW tmatch : Lexer -> (String -> Token) -> Tokenizer ExtToken
match t f = Tokenizer.match t (Just . f) tmatch t f = match t (T . f)
%hide Tokenizer.match
private
name : Tokenizer TokenW
name = match name $ Name . fromListP . split (== '.') . normalizeNfc
||| [todo] escapes other than `\"` and (accidentally) `\\` ||| [todo] escapes other than `\"` and (accidentally) `\\`
export export
fromStringLit : String -> String fromStringLit : String -> String
@ -76,17 +82,17 @@ fromStringLit = pack . go . unpack . drop 1 . dropLast 1 where
go (c :: cs) = c :: go cs go (c :: cs) = c :: go cs
private private
string : Tokenizer TokenW string : Tokenizer ExtToken
string = match stringLit (Str . fromStringLit) string = tmatch stringLit (Str . fromStringLit)
private private
nat : Tokenizer TokenW nat : Tokenizer ExtToken
nat = match (some (range '0' '9')) (Nat . cast) nat = tmatch (some (range '0' '9')) (Nat . cast)
private private
tag : Tokenizer TokenW tag : Tokenizer ExtToken
tag = match (is '\'' <+> name) (Tag . drop 1) tag = tmatch (is '\'' <+> name) (Tag . drop 1)
<|> match (is '\'' <+> stringLit) (Tag . fromStringLit . drop 1) <|> tmatch (is '\'' <+> stringLit) (Tag . fromStringLit . drop 1)
@ -112,17 +118,17 @@ supToNat = cast . pack . map fromSup . unpack
-- ★0, Type0. base ★/Type is a Reserved -- ★0, Type0. base ★/Type is a Reserved
private private
universe : Tokenizer TokenW universe : Tokenizer ExtToken
universe = universeWith "" <|> universeWith "Type" where universe = universeWith "" <|> universeWith "Type" where
universeWith : String -> Tokenizer TokenW universeWith : String -> Tokenizer ExtToken
universeWith pfx = universeWith pfx =
let len = length pfx in let len = length pfx in
match (exact pfx <+> digits) (TYPE . cast . drop len) tmatch (exact pfx <+> digits) (TYPE . cast . drop len)
private private
sup : Tokenizer TokenW sup : Tokenizer ExtToken
sup = match (some $ pred isSupDigit) (Sup . supToNat) sup = tmatch (some $ pred isSupDigit) (Sup . supToNat)
<|> match (is '^' <+> digits) (Sup . cast . drop 1) <|> tmatch (is '^' <+> digits) (Sup . cast . drop 1)
private %inline private %inline
@ -165,17 +171,23 @@ resString : Reserved -> String
resString (Only r) = resString1 r resString (Only r) = resString1 r
resString (r `Or` _) = resString1 r resString (r `Or` _) = resString1 r
||| return both representative strings for a token description
public export
resString2 : Reserved -> List String
resString2 (Only r) = [resString1 r]
resString2 (r `Or` s) = [resString1 r, resString1 s]
private private
resTokenizer1 : Reserved1 -> String -> Tokenizer TokenW resTokenizer1 : Reserved1 -> String -> Tokenizer ExtToken
resTokenizer1 r str = resTokenizer1 r str =
let res : String -> Token := const $ Reserved str in let res : String -> Token := const $ Reserved str in
case r of Word w => match (exact w <+> reject idContEnd) res case r of Word w => tmatch (exact w <+> reject idContEnd) res
Sym s => match (exact s <+> reject symCont) res Sym s => tmatch (exact s <+> reject symCont) res
Punc x => match (exact x) res Punc x => tmatch (exact x) res
||| match a reserved token ||| match a reserved token
export export
resTokenizer : Reserved -> Tokenizer TokenW resTokenizer : Reserved -> Tokenizer ExtToken
resTokenizer (Only r) = resTokenizer1 r (resString1 r) resTokenizer (Only r) = resTokenizer1 r (resString1 r)
resTokenizer (r `Or` s) = resTokenizer (r `Or` s) =
resTokenizer1 r (resString1 r) <|> resTokenizer1 s (resString1 r) resTokenizer1 r (resString1 r) <|> resTokenizer1 s (resString1 r)
@ -219,14 +231,31 @@ reserved =
Word1 "load", Word1 "load",
Word1 "namespace"] Word1 "namespace"]
public export
reservedStrings : List String
reservedStrings = map resString reserved
public export
allReservedStrings : List String
allReservedStrings = foldMap resString2 reserved
||| `IsReserved str` is true if `Reserved str` might actually show up in ||| `IsReserved str` is true if `Reserved str` might actually show up in
||| the token stream ||| the token stream
public export public export
IsReserved : String -> Type IsReserved : String -> Type
IsReserved str = str `Elem` map resString reserved IsReserved str = So (str `elem` allReservedStrings)
private
name : Tokenizer ExtToken
name =
match name $ \str =>
let parts = split (== '.') $ normalizeNfc str in
case find (`elem` allReservedStrings) (toList parts) of
Nothing => T $ Name $ fromListP parts
Just w => Invalid "reserved word '\{w}' inside name \{str}" str
export export
tokens : Tokenizer TokenW tokens : Tokenizer ExtToken
tokens = choice $ tokens = choice $
map skip [pred isWhitespace, map skip [pred isWhitespace,
lineComment (exact "--" <+> reject symCont), lineComment (exact "--" <+> reject symCont),
@ -235,10 +264,24 @@ tokens = choice $
map resTokenizer reserved <+> map resTokenizer reserved <+>
[sup, nat, string, tag, name] [sup, nat, string, tag, name]
export
check : Alternative f =>
WithBounds ExtToken -> Either Error (f (WithBounds Token))
check (MkBounded val irr bounds@(MkBounds line col _ _)) = case val of
Skip => Right empty
T tok => Right $ pure $ MkBounded tok irr bounds
Invalid msg tok => Left $ Err (Other msg) line col (index 0 tok)
export
toErrorReason : StopReason -> Maybe ErrorReason
toErrorReason EndInput = Nothing
toErrorReason NoRuleApply = Just NoRuleApply
toErrorReason (ComposeNotClosing s e) = Just $ ComposeNotClosing s e
export export
lex : String -> Either Error (List (WithBounds Token)) lex : String -> Either Error (List (WithBounds Token))
lex str = lex str =
let (res, reason, line, col, str) = lex tokens str in let (res, reason, line, col, str) = lex tokens str in
case reason of case toErrorReason reason of
EndInput => Right $ mapMaybe sequence res Nothing => concatMap check res @{MonoidApplicative}
_ => Left $ Err {reason, line, col, char = index 0 str} Just e => Left $ Err {reason = e, line, col, char = index 0 str}

View file

@ -47,7 +47,12 @@ tests = "lexer" :- [
lexes " " [], lexes " " [],
lexes "-- line comment" [], lexes "-- line comment" [],
lexes "name -- line comment" [Name "name"], lexes "name -- line comment" [Name "name"],
lexes "-- line comment\nnameBetween -- and another" [Name "nameBetween"], lexes
"""
-- line comment
nameBetween -- and another
"""
[Name "nameBetween"],
lexes "{- block comment -}" [], lexes "{- block comment -}" [],
lexes "{- {- nested -} block comment -}" [] lexes "{- {- nested -} block comment -}" []
], ],
@ -70,13 +75,14 @@ tests = "lexer" :- [
lexes "normalïse" [Name "normalïse"], lexes "normalïse" [Name "normalïse"],
-- ↑ replace i + combining ¨ with precomposed ï -- ↑ replace i + combining ¨ with precomposed ï
lexes "map#" [Name "map#"], lexes "map#" [Name "map#"],
lexes "map#[" [Name "map#", Reserved "["], -- don't actually do this
lexes "map #[" [Name "map", Reserved "#["],
lexes "write!" [Name "write!"], lexes "write!" [Name "write!"],
lexes "uhh??!?!?!?" [Name "uhh??!?!?!?"], lexes "uhh??!?!?!?" [Name "uhh??!?!?!?"],
todo "check for reserved words in a qname", lexFail "abc.fun.ghi",
skip $ lexFail "abc.λ.ghi",
lexes "abc.fun.def" lexFail "abc.ω.ghi",
[Name "abc", Reserved ".", Reserved "λ", Reserved ".", Name "def"],
lexes "+" [Name "+"], lexes "+" [Name "+"],
lexes "*" [Name "*"], lexes "*" [Name "*"],