From be944226687b89acf4216250a9a2113e41509ae4 Mon Sep 17 00:00:00 2001 From: rhiannon morris Date: Thu, 16 Mar 2023 18:34:49 +0100 Subject: [PATCH] move name lexing stuff to Quox.Name --- lib/Quox/Name.idr | 33 +++++++++++++++++++++++++++++++++ lib/Quox/Parser/Lexer.idr | 34 ++-------------------------------- 2 files changed, 35 insertions(+), 32 deletions(-) diff --git a/lib/Quox/Name.idr b/lib/Quox/Name.idr index 20d16d8..757f612 100644 --- a/lib/Quox/Name.idr +++ b/lib/Quox/Name.idr @@ -3,6 +3,8 @@ module Quox.Name import public Data.SnocList import Data.List import Derive.Prelude +import Quox.CharExtra +import Text.Lexer %hide TT.Name @@ -95,3 +97,34 @@ fromListP (x ::: xs) = go [<] x xs where export %inline fromList : List1 String -> Name fromList = fromPName . fromListP + + +export +syntaxChars : List Char +syntaxChars = ['(', ')', '[', ']', '{', '}', '"', '\'', ',', '.', ';'] + +export +isSymStart, isSymCont : Char -> Bool +isSymStart c = not (c `elem` syntaxChars) && isSymChar c +isSymCont c = c == '\'' || isSymStart c + +export +idStart, idCont, idEnd, idContEnd : Lexer +idStart = pred isIdStart +idCont = pred isIdCont +idEnd = pred $ \c => c `elem` unpack "?!#" +idContEnd = idCont <|> idEnd + +export +symStart, symCont : Lexer +symStart = pred isSymStart +symCont = pred isSymCont + +export +baseName : Lexer +baseName = idStart <+> many idCont <+> many idEnd + <|> symStart <+> many symCont + +export +name : Lexer +name = baseName <+> many (is '.' <+> baseName) diff --git a/lib/Quox/Parser/Lexer.idr b/lib/Quox/Parser/Lexer.idr index 1353b3b..873a549 100644 --- a/lib/Quox/Parser/Lexer.idr +++ b/lib/Quox/Parser/Lexer.idr @@ -59,39 +59,9 @@ match t f = Tokenizer.match t (Just . f) %hide Tokenizer.match -export %inline -syntaxChars : List Char -syntaxChars = ['(', ')', '[', ']', '{', '}', '"', '\'', ',', '.', ';'] - -private -isSymStart, isSymCont : Char -> Bool -isSymStart c = not (c `elem` syntaxChars) && isSymChar c -isSymCont c = c == '\'' || isSymStart c - -private -idStart, idCont, idEnd, idContEnd : Lexer -idStart = pred isIdStart -idCont = pred isIdCont -idEnd = pred $ \c => c `elem` unpack "?!#" -idContEnd = idCont <|> idEnd - -private -symStart, symCont : Lexer -symStart = pred isSymStart -symCont = pred isSymCont - -private -baseNameL : Lexer -baseNameL = idStart <+> many idCont <+> many idEnd - <|> symStart <+> many symCont - -private -nameL : Lexer -nameL = baseNameL <+> many (is '.' <+> baseNameL) - private name : Tokenizer TokenW -name = match nameL $ Name . fromListP . split (== '.') . normalizeNfc +name = match name $ Name . fromListP . split (== '.') . normalizeNfc ||| [todo] escapes other than `\"` and (accidentally) `\\` export @@ -113,7 +83,7 @@ nat = match (some (range '0' '9')) (Nat . cast) private tag : Tokenizer TokenW -tag = match (is '\'' <+> nameL) (Tag . drop 1) +tag = match (is '\'' <+> name) (Tag . drop 1) <|> match (is '\'' <+> stringLit) (Tag . fromStringLit . drop 1)