diff --git a/lib/Quox/CharExtra.idr b/lib/Quox/CharExtra.idr new file mode 100644 index 0000000..c9f718b --- /dev/null +++ b/lib/Quox/CharExtra.idr @@ -0,0 +1,130 @@ +module Quox.CharExtra + +import Generics.Derive + + +%default total +%language ElabReflection + + +namespace Letter + public export + data Letter = Uppercase | Lowercase | Titlecase | Modifier | Other + %runElab derive "Letter" [Generic, Meta, Eq, Ord, DecEq, Show] + +namespace Mark + public export + data Mark = NonSpacing | SpacingCombining | Enclosing + %runElab derive "Mark" [Generic, Meta, Eq, Ord, DecEq, Show] + +namespace Number + public export + data Number = Decimal | Letter | Other + %runElab derive "Number" [Generic, Meta, Eq, Ord, DecEq, Show] + +namespace Punctuation + public export + data Punctuation = Connector | Dash | Open | Close + | InitialQuote | FinalQuote | Other + %runElab derive "Punctuation" [Generic, Meta, Eq, Ord, DecEq, Show] + +namespace Symbol + public export + data Symbol = Math | Currency | Modifier | Other + %runElab derive "Symbol" [Generic, Meta, Eq, Ord, DecEq, Show] + +namespace Separator + public export + data Separator = Space | Line | Paragraph + %runElab derive "Separator" [Generic, Meta, Eq, Ord, DecEq, Show] + +namespace Other + public export + data Other = Control | Format | Surrogate | PrivateUse | NotAssigned + %runElab derive "Other" [Generic, Meta, Eq, Ord, DecEq, Show] + + +public export +data GeneralCategory += Letter Letter +| Mark Mark +| Number Number +| Punctuation Punctuation +| Symbol Symbol +| Separator Separator +| Other Other +%runElab derive "GeneralCategory" [Generic, Meta, Eq, Ord, DecEq, Show] + + +private +%foreign "scheme:(lambda (c) (symbol->string (char-general-category c)))" +prim__genCat : Char -> String + +export +genCat : Char -> GeneralCategory +genCat ch = assert_total $ + case prim__genCat ch of + "Lu" => Letter Uppercase + "Ll" => Letter Lowercase + "Lt" => Letter Titlecase + "Lm" => Letter Modifier + "Lo" => Letter Other + "Mn" => Mark NonSpacing + "Mc" => Mark SpacingCombining + "Me" => Mark Enclosing + "Nd" => Number Decimal + "Nl" => Number Letter + "No" => Number Other + "Pc" => Punctuation Connector + "Pd" => Punctuation Dash + "Ps" => Punctuation Open + "Pe" => Punctuation Close + "Pi" => Punctuation InitialQuote + "Pf" => Punctuation FinalQuote + "Po" => Punctuation Other + "Sm" => Symbol Math + "Sc" => Symbol Currency + "Sk" => Symbol Modifier + "So" => Symbol Other + "Zs" => Separator Space + "Zl" => Separator Line + "Zp" => Separator Paragraph + "Cc" => Other Control + "Cf" => Other Format + "Cs" => Other Surrogate + "Co" => Other PrivateUse + "Cn" => Other NotAssigned + + +export +isPrintable : Char -> Bool +isPrintable ch = case genCat ch of Other _ => False; _ => True + +export +isIdStart : Char -> Bool +isIdStart ch = + case genCat ch of + Letter _ => True + Punctuation Connector => True -- _, tie bars, etc + _ => False + +export +isIdCont : Char -> Bool +isIdCont ch = + isIdStart ch || ch == '\'' || + case genCat ch of + Mark _ => True + Number _ => True + _ => False + +export +isSymChar : Char -> Bool +isSymChar ch = + case genCat ch of + Symbol _ => True + Punctuation Other => True + _ => False + +export +isWhitespace : Char -> Bool +isWhitespace ch = case genCat ch of Separator _ => True; _ => False