From cacb3225a20a187c2b1232324a38516be7bea868 Mon Sep 17 00:00:00 2001 From: rhiannon morris Date: Mon, 27 Feb 2023 07:27:27 +0100 Subject: [PATCH] unicode stuff --- lib/Quox/Unicode.idr | 153 +++++++++++++++++++++++++++++++++++ lib/on-hold/Quox/Unicode.idr | 140 -------------------------------- lib/quox-lib.ipkg | 1 + 3 files changed, 154 insertions(+), 140 deletions(-) create mode 100644 lib/Quox/Unicode.idr delete mode 100644 lib/on-hold/Quox/Unicode.idr diff --git a/lib/Quox/Unicode.idr b/lib/Quox/Unicode.idr new file mode 100644 index 0000000..5cd337b --- /dev/null +++ b/lib/Quox/Unicode.idr @@ -0,0 +1,153 @@ +module Quox.Unicode + +import Generics.Derive + + +%default total +%language ElabReflection + + +namespace Letter + public export + data Letter = Uppercase | Lowercase | Titlecase | Modifier | Other + %runElab derive "Letter" [Generic, Meta, Eq, Ord, DecEq, Show] + +namespace Mark + public export + data Mark = NonSpacing | SpacingCombining | Enclosing + %runElab derive "Mark" [Generic, Meta, Eq, Ord, DecEq, Show] + +namespace Number + public export + data Number = Decimal | Letter | Other + %runElab derive "Number" [Generic, Meta, Eq, Ord, DecEq, Show] + +namespace Punctuation + public export + data Punctuation = Connector | Dash | Open | Close + | InitialQuote | FinalQuote | Other + %runElab derive "Punctuation" [Generic, Meta, Eq, Ord, DecEq, Show] + +namespace Symbol + public export + data Symbol = Math | Currency | Modifier | Other + %runElab derive "Symbol" [Generic, Meta, Eq, Ord, DecEq, Show] + +namespace Separator + public export + data Separator = Space | Line | Paragraph + %runElab derive "Separator" [Generic, Meta, Eq, Ord, DecEq, Show] + +namespace Other + public export + data Other = Control | Format | Surrogate | PrivateUse | NotAssigned + %runElab derive "Other" [Generic, Meta, Eq, Ord, DecEq, Show] + + +public export +data GeneralCategory += Letter Letter +| Mark Mark +| Number Number +| Punctuation Punctuation +| Symbol Symbol +| Separator Separator +| Other Other +%runElab derive "GeneralCategory" [Generic, Meta, Eq, Ord, DecEq, Show] + +private +%foreign "scheme:(lambda (c) (symbol->string (char-general-category c)))" +prim__genCat : Char -> String + +export +genCat : Char -> GeneralCategory +genCat ch = assert_total $ + case prim__genCat ch of + "Lu" => Letter Uppercase + "Ll" => Letter Lowercase + "Lt" => Letter Titlecase + "Lm" => Letter Modifier + "Lo" => Letter Other + "Mn" => Mark NonSpacing + "Mc" => Mark SpacingCombining + "Me" => Mark Enclosing + "Nd" => Number Decimal + "Nl" => Number Letter + "No" => Number Other + "Pc" => Punctuation Connector + "Pd" => Punctuation Dash + "Ps" => Punctuation Open + "Pe" => Punctuation Close + "Pi" => Punctuation InitialQuote + "Pf" => Punctuation FinalQuote + "Po" => Punctuation Other + "Sm" => Symbol Math + "Sc" => Symbol Currency + "Sk" => Symbol Modifier + "So" => Symbol Other + "Zs" => Separator Space + "Zl" => Separator Line + "Zp" => Separator Paragraph + "Cc" => Other Control + "Cf" => Other Format + "Cs" => Other Surrogate + "Co" => Other PrivateUse + "Cn" => Other NotAssigned + cat => idris_crash #"Quox.Unicode.genCat: unknown category "\{cat}""# + + +namespace GeneralCategory + public export %inline + isLetter, isMark, isNumber, isPunctuation, isSymbol, isSeparator, isOther : + GeneralCategory -> Bool + isLetter = \case Letter _ => True; _ => False + isMark = \case Mark _ => True; _ => False + isNumber = \case Number _ => True; _ => False + isPunctuation = \case Punctuation _ => True; _ => False + isSymbol = \case Symbol _ => True; _ => False + isSeparator = \case Separator _ => True; _ => False + isOther = \case Other _ => True; _ => False + +namespace Char + public export %inline + isLetter, isMark, isNumber, isPunctuation, isSymbol, isSeparator, isOther : + Char -> Bool + isLetter = isLetter . genCat + isMark = isMark . genCat + isNumber = isNumber . genCat + isPunctuation = isPunctuation . genCat + isSymbol = isSymbol . genCat + isSeparator = isSeparator . genCat + isOther = isOther . genCat + + +export +isIdStart : Char -> Bool +isIdStart ch = isLetter ch || isNumber ch && not ('0' <= ch && ch <= '9') + +export +isIdCont : Char -> Bool +isIdCont ch = isIdStart ch || ch == '\'' || isMark ch || isNumber ch + +export +isIdConnector : Char -> Bool +isIdConnector ch = genCat ch == Punctuation Connector + + +export +isSymChar : Char -> Bool +isSymChar ch = + case genCat ch of + Symbol _ => True + Punctuation Dash => True + Punctuation Other => True + _ => False + +export +isWhitespace : Char -> Bool +isWhitespace ch = ch == '\t' || ch == '\r' || ch == '\n' || isSeparator ch + + +export +%foreign "scheme:string-normalize-nfc" +normalizeNfc : String -> String diff --git a/lib/on-hold/Quox/Unicode.idr b/lib/on-hold/Quox/Unicode.idr deleted file mode 100644 index bcaf956..0000000 --- a/lib/on-hold/Quox/Unicode.idr +++ /dev/null @@ -1,140 +0,0 @@ -module Quox.Unicode - -import Generics.Derive - - -%default total -%language ElabReflection - - -namespace Letter - public export - data Letter = Uppercase | Lowercase | Titlecase | Modifier | Other - %runElab derive "Letter" [Generic, Meta, Eq, Ord, DecEq, Show] - -namespace Mark - public export - data Mark = NonSpacing | SpacingCombining | Enclosing - %runElab derive "Mark" [Generic, Meta, Eq, Ord, DecEq, Show] - -namespace Number - public export - data Number = Decimal | Letter | Other - %runElab derive "Number" [Generic, Meta, Eq, Ord, DecEq, Show] - -namespace Punctuation - public export - data Punctuation = Connector | Dash | Open | Close - | InitialQuote | FinalQuote | Other - %runElab derive "Punctuation" [Generic, Meta, Eq, Ord, DecEq, Show] - -namespace Symbol - public export - data Symbol = Math | Currency | Modifier | Other - %runElab derive "Symbol" [Generic, Meta, Eq, Ord, DecEq, Show] - -namespace Separator - public export - data Separator = Space | Line | Paragraph - %runElab derive "Separator" [Generic, Meta, Eq, Ord, DecEq, Show] - -namespace Other - public export - data Other = Control | Format | Surrogate | PrivateUse | NotAssigned - %runElab derive "Other" [Generic, Meta, Eq, Ord, DecEq, Show] - - -public export -data GeneralCategory -= Letter Letter -| Mark Mark -| Number Number -| Punctuation Punctuation -| Symbol Symbol -| Separator Separator -| Other Other -%runElab derive "GeneralCategory" [Generic, Meta, Eq, Ord, DecEq, Show] - - -private -%foreign "scheme:(lambda (c) (symbol->string (char-general-category c)))" -prim__genCat : Char -> String - -export -genCat : Char -> GeneralCategory -genCat ch = assert_total $ - case prim__genCat ch of - "Lu" => Letter Uppercase - "Ll" => Letter Lowercase - "Lt" => Letter Titlecase - "Lm" => Letter Modifier - "Lo" => Letter Other - "Mn" => Mark NonSpacing - "Mc" => Mark SpacingCombining - "Me" => Mark Enclosing - "Nd" => Number Decimal - "Nl" => Number Letter - "No" => Number Other - "Pc" => Punctuation Connector - "Pd" => Punctuation Dash - "Ps" => Punctuation Open - "Pe" => Punctuation Close - "Pi" => Punctuation InitialQuote - "Pf" => Punctuation FinalQuote - "Po" => Punctuation Other - "Sm" => Symbol Math - "Sc" => Symbol Currency - "Sk" => Symbol Modifier - "So" => Symbol Other - "Zs" => Separator Space - "Zl" => Separator Line - "Zp" => Separator Paragraph - "Cc" => Other Control - "Cf" => Other Format - "Cs" => Other Surrogate - "Co" => Other PrivateUse - "Cn" => Other NotAssigned - - -export -isIdStart : Char -> Bool -isIdStart ch = - case genCat ch of - Letter _ => True - Number _ => not ('0' <= ch && ch <= '9') - _ => False - -export -isIdCont : Char -> Bool -isIdCont ch = - isIdStart ch || ch == '\'' || - case genCat ch of - Mark _ => True - Number _ => True - _ => False - -export -isIdConnector : Char -> Bool -isIdConnector ch = - case genCat ch of Punctuation Connector => True; _ => False - - -export -isSymChar : Char -> Bool -isSymChar ch = - case genCat ch of - Symbol _ => True - Punctuation Dash => True - Punctuation Other => True - _ => False - -export -isWhitespace : Char -> Bool -isWhitespace ch = - ch == '\t' || ch == '\r' || ch == '\n' || - case genCat ch of Separator _ => True; _ => False - - -export -%foreign "scheme:string-normalize-nfc" -normalizeNfc : String -> String diff --git a/lib/quox-lib.ipkg b/lib/quox-lib.ipkg index d24fae4..325eebe 100644 --- a/lib/quox-lib.ipkg +++ b/lib/quox-lib.ipkg @@ -11,6 +11,7 @@ modules = Quox.NatExtra, Quox.BoolExtra, Quox.Decidable, + Quox.Unicode, Quox.No, Quox.Pretty, Quox.Syntax,