From 38ecabdda80bb60243117450cb4a949017881106 Mon Sep 17 00:00:00 2001 From: rhiannon morris Date: Wed, 4 May 2022 03:11:37 +0200 Subject: [PATCH] numbers with bases --- src/Quox/Lexer.idr | 62 +++++++++++++++++++++++++-------------- tests/src/Tests/Lexer.idr | 29 +++++++++++++----- 2 files changed, 61 insertions(+), 30 deletions(-) diff --git a/src/Quox/Lexer.idr b/src/Quox/Lexer.idr index f64617c..3b8d924 100644 --- a/src/Quox/Lexer.idr +++ b/src/Quox/Lexer.idr @@ -3,6 +3,7 @@ module Quox.Lexer import Quox.Error import Data.String +import Data.String.Extra import public Text.Lexer import public Text.Lexer.Tokenizer import Generics.Derive @@ -59,40 +60,55 @@ wild = exact "_" <+> reject nameCont %hide Text.Lexer.symbol symbol = exact "'" <+> name +number : Lexer -> Lexer +number char = char <+> many (opt (is '_') <+> char) <+> reject nameCont + +octal = approx "0o" <+> number octDigit +decimal = number digit +hexadecimal = approx "0x" <+> number hexDigit -decimal : Lexer -decimal = - digit <+> opt (many (digit <|> is '_') <+> digit) natToNumber : Nat -> Number natToNumber 0 = Zero natToNumber 1 = One natToNumber k = Other k -toDigit : Char -> Nat -toDigit c = cast $ ord c - ord '0' -makeDec' : Nat -> String -> Maybe Nat -makeDec' acc x with (asList x) - makeDec' acc "" | [] = pure acc - makeDec' acc (strCons '_' str) | '_' :: lst = makeDec' acc str | lst - makeDec' acc (strCons d str) | d :: lst = - if d >= '0' && d <= '9' then - makeDec' (acc * 10 + toDigit d) str | lst - else - Nothing +toHexit : Char -> Nat +toHexit c = cast $ + if '0' <= c && c <= '9' then + ord c - ord '0' + else if 'a' <= c && c <= 'f' then + ord c - ord 'a' + 10 + else if 'A' <= c && c <= 'F' then + ord c - ord 'A' + 10 + else 0 + +parameters (base : Nat) (single : Char -> Nat) + makeNat : Nat -> List Char -> Nat + makeNat acc [] = acc + makeNat acc ('_' :: lst) = makeNat acc lst + makeNat acc (d :: lst) = makeNat (acc * base + single d) lst + +makeOct = makeNat 8 toHexit 0 . unpack +makeDec = makeNat 10 toHexit 0 . unpack +makeHex = makeNat 16 toHexit 0 . unpack -makeDec = fromMaybe 0 . makeDec' 0 skip : Lexer -> Tokenizer (Maybe a) skip lex = match lex $ const Nothing simple : List String -> a -> Tokenizer (Maybe a) -simple str x = match (choice $ map exact str) $ const $ Just x +simple str = match (choice $ map exact str) . const . Just choice : (xs : List (Tokenizer a)) -> {auto 0 _ : So (isCons xs)} -> Tokenizer a choice (t :: ts) = foldl (\a, b => a <|> b) t ts +match : Lexer -> (String -> a) -> Tokenizer (Maybe a) +match lex f = Tokenizer.match lex (Just . f) +%hide Tokenizer.match + + tokens : Tokenizer (Maybe Token) tokens = choice [ skip $ lineComment $ exact "--", @@ -104,21 +120,23 @@ tokens = choice [ simple ["{"] $ P LBrace, simple ["}"] $ P RBrace, simple [","] $ P Comma, simple ["::", "∷"] $ P DblColon, - simple [":"] $ P Colon, + simple [":"] $ P Colon, -- needs to be after "::" simple ["->", "→"] $ P Arrow, simple ["=>", "⇒"] $ P DblArrow, simple ["**", "×"] $ P Times, simple ["<<", "⊲"] $ P Triangle, - match wild $ const $ Just $ P Wild, + match wild $ const $ P Wild, - match name $ Just . Name, - match symbol $ Just . Symbol . assert_total strTail, + match name $ Name, + match symbol $ Symbol . assert_total strTail, - -- [todo] other bases? - match (some $ digit <|> exact "_") $ Just . N . natToNumber . makeDec + match decimal $ N . natToNumber . makeDec, + match hexadecimal $ N . natToNumber . makeHex . drop 2, + match octal $ N . natToNumber . makeOct . drop 2 ] + export lex : MonadThrow Error m => String -> m (List (WithBounds Token)) lex str = diff --git a/tests/src/Tests/Lexer.idr b/tests/src/Tests/Lexer.idr index 4682612..7e05d8c 100644 --- a/tests/src/Tests/Lexer.idr +++ b/tests/src/Tests/Lexer.idr @@ -105,16 +105,29 @@ tests = "lexer" :- [ acceptsWith' "'a" [Symbol "a"], acceptsWith' "'ab" [Symbol "ab"], acceptsWith' "'_b" [Symbol "_b"], - rejects' "'" + rejects' "'", + rejects' "1abc" ], "numbers" :- [ - acceptsWith' "0" [N Zero], - acceptsWith' "1" [N One], - acceptsWith' "2" [N $ Other 2], - acceptsWith' "69" [N $ Other 69], - acceptsWith' "1_000" [N $ Other 1000], - todo "octal", - todo "hex" + acceptsWith' "0" [N Zero], + acceptsWith' "1" [N One], + acceptsWith' "2" [N $ Other 2], + acceptsWith' "69" [N $ Other 69], + acceptsWith' "1_000" [N $ Other 1000], + acceptsWith' "0o0" [N Zero], + acceptsWith' "0o105" [N $ Other 69], + acceptsWith' "0O0" [N Zero], + acceptsWith' "0O105" [N $ Other 69], + acceptsWith' "0x0" [N Zero], + acceptsWith' "0x45" [N $ Other 69], + acceptsWith' "0xabc" [N $ Other 2748], + acceptsWith' "0xABC" [N $ Other 2748], + acceptsWith' "0xA_BC" [N $ Other 2748], + acceptsWith' "0X0" [N Zero], + acceptsWith' "0X45" [N $ Other 69], + acceptsWith' "0XABC" [N $ Other 2748], + rejects' "1_", + rejects' "1__000" ] ]