module Quox.Unicode import Generics.Derive %default total %language ElabReflection namespace Letter public export data Letter = Uppercase | Lowercase | Titlecase | Modifier | Other %runElab derive "Letter" [Generic, Meta, Eq, Ord, DecEq, Show] namespace Mark public export data Mark = NonSpacing | SpacingCombining | Enclosing %runElab derive "Mark" [Generic, Meta, Eq, Ord, DecEq, Show] namespace Number public export data Number = Decimal | Letter | Other %runElab derive "Number" [Generic, Meta, Eq, Ord, DecEq, Show] namespace Punctuation public export data Punctuation = Connector | Dash | Open | Close | InitialQuote | FinalQuote | Other %runElab derive "Punctuation" [Generic, Meta, Eq, Ord, DecEq, Show] namespace Symbol public export data Symbol = Math | Currency | Modifier | Other %runElab derive "Symbol" [Generic, Meta, Eq, Ord, DecEq, Show] namespace Separator public export data Separator = Space | Line | Paragraph %runElab derive "Separator" [Generic, Meta, Eq, Ord, DecEq, Show] namespace Other public export data Other = Control | Format | Surrogate | PrivateUse | NotAssigned %runElab derive "Other" [Generic, Meta, Eq, Ord, DecEq, Show] public export data GeneralCategory = Letter Letter | Mark Mark | Number Number | Punctuation Punctuation | Symbol Symbol | Separator Separator | Other Other %runElab derive "GeneralCategory" [Generic, Meta, Eq, Ord, DecEq, Show] private %foreign "scheme:(lambda (c) (symbol->string (char-general-category c)))" prim__genCat : Char -> String export genCat : Char -> GeneralCategory genCat ch = assert_total $ case prim__genCat ch of "Lu" => Letter Uppercase "Ll" => Letter Lowercase "Lt" => Letter Titlecase "Lm" => Letter Modifier "Lo" => Letter Other "Mn" => Mark NonSpacing "Mc" => Mark SpacingCombining "Me" => Mark Enclosing "Nd" => Number Decimal "Nl" => Number Letter "No" => Number Other "Pc" => Punctuation Connector "Pd" => Punctuation Dash "Ps" => Punctuation Open "Pe" => Punctuation Close "Pi" => Punctuation InitialQuote "Pf" => Punctuation FinalQuote "Po" => Punctuation Other "Sm" => Symbol Math "Sc" => Symbol Currency "Sk" => Symbol Modifier "So" => Symbol Other "Zs" => Separator Space "Zl" => Separator Line "Zp" => Separator Paragraph "Cc" => Other Control "Cf" => Other Format "Cs" => Other Surrogate "Co" => Other PrivateUse "Cn" => Other NotAssigned export isIdStart : Char -> Bool isIdStart ch = case genCat ch of Letter _ => True Number _ => not ('0' <= ch && ch <= '9') _ => False export isIdCont : Char -> Bool isIdCont ch = isIdStart ch || ch == '\'' || case genCat ch of Mark _ => True Number _ => True _ => False export isIdConnector : Char -> Bool isIdConnector ch = case genCat ch of Punctuation Connector => True; _ => False export isSymChar : Char -> Bool isSymChar ch = case genCat ch of Symbol _ => True Punctuation Dash => True Punctuation Other => True _ => False export isWhitespace : Char -> Bool isWhitespace ch = ch == '\t' || ch == '\r' || ch == '\n' || case genCat ch of Separator _ => True; _ => False export %foreign "scheme:string-normalize-nfc" normalizeNfc : String -> String