module Quox.CharExtra import Derive.Prelude %default total %language ElabReflection namespace Letter public export data Letter = Uppercase | Lowercase | Titlecase | Modifier | Other %runElab derive "Letter" [Eq, Ord, Show] namespace Mark public export data Mark = NonSpacing | SpacingCombining | Enclosing %runElab derive "Mark" [Eq, Ord, Show] namespace Number public export data Number = Decimal | Letter | Other %runElab derive "Number" [Eq, Ord, Show] namespace Punctuation public export data Punctuation = Connector | Dash | Open | Close | InitialQuote | FinalQuote | Other %runElab derive "Punctuation" [Eq, Ord, Show] namespace Symbol public export data Symbol = Math | Currency | Modifier | Other %runElab derive "Symbol" [Eq, Ord, Show] namespace Separator public export data Separator = Space | Line | Paragraph %runElab derive "Separator" [Eq, Ord, Show] namespace Other public export data Other = Control | Format | Surrogate | PrivateUse | NotAssigned %runElab derive "Other" [Eq, Ord, Show] public export data GeneralCategory = Letter Letter | Mark Mark | Number Number | Punctuation Punctuation | Symbol Symbol | Separator Separator | Other Other %runElab derive "GeneralCategory" [Eq, Ord, Show] private %foreign "scheme:(lambda (c) (symbol->string (char-general-category c)))" prim__genCat : Char -> String export genCat : Char -> GeneralCategory genCat ch = assert_total $ case prim__genCat ch of "Lu" => Letter Uppercase "Ll" => Letter Lowercase "Lt" => Letter Titlecase "Lm" => Letter Modifier "Lo" => Letter Other "Mn" => Mark NonSpacing "Mc" => Mark SpacingCombining "Me" => Mark Enclosing "Nd" => Number Decimal "Nl" => Number Letter "No" => Number Other "Pc" => Punctuation Connector "Pd" => Punctuation Dash "Ps" => Punctuation Open "Pe" => Punctuation Close "Pi" => Punctuation InitialQuote "Pf" => Punctuation FinalQuote "Po" => Punctuation Other "Sm" => Symbol Math "Sc" => Symbol Currency "Sk" => Symbol Modifier "So" => Symbol Other "Zs" => Separator Space "Zl" => Separator Line "Zp" => Separator Paragraph "Cc" => Other Control "Cf" => Other Format "Cs" => Other Surrogate "Co" => Other PrivateUse "Cn" => Other NotAssigned cat => idris_crash #"Quox.Unicode.genCat: unknown category "\{cat}""# namespace GeneralCategory public export %inline isLetter, isMark, isNumber, isPunctuation, isSymbol, isSeparator, isOther : GeneralCategory -> Bool isLetter = \case Letter _ => True; _ => False isMark = \case Mark _ => True; _ => False isNumber = \case Number _ => True; _ => False isPunctuation = \case Punctuation _ => True; _ => False isSymbol = \case Symbol _ => True; _ => False isSeparator = \case Separator _ => True; _ => False isOther = \case Other _ => True; _ => False namespace Char public export %inline isLetter, isMark, isNumber, isPunctuation, isSymbol, isSeparator, isOther : Char -> Bool isLetter = isLetter . genCat isMark = isMark . genCat isNumber = isNumber . genCat isPunctuation = isPunctuation . genCat isSymbol = isSymbol . genCat isSeparator = isSeparator . genCat isOther = isOther . genCat export isSupDigit : Char -> Bool isSupDigit ch = ch `elem` unpack "⁰¹²³⁴⁵⁶⁷⁸⁹" export isSubDigit : Char -> Bool isSubDigit ch = ch `elem` unpack "₀₁₂₃₄₅₆₇₈₉" export isAsciiDigit : Char -> Bool isAsciiDigit ch = '0' <= ch && ch <= '9' export isIdStart : Char -> Bool isIdStart ch = (ch == '_' || isLetter ch || isNumber ch) && not (isSupDigit ch || isAsciiDigit ch) export isIdCont : Char -> Bool isIdCont ch = (isIdStart ch || ch == '\'' || ch == '-' || isMark ch || isNumber ch) && not (isSupDigit ch) export isIdConnector : Char -> Bool isIdConnector ch = genCat ch == Punctuation Connector export isSymChar : Char -> Bool isSymChar ch = case genCat ch of Symbol _ => True Punctuation Dash => True Punctuation Other => True _ => False export isWhitespace : Char -> Bool isWhitespace ch = ch == '\t' || ch == '\r' || ch == '\n' || isSeparator ch export %foreign "scheme:string-normalize-nfc" normalizeNfc : String -> String