quox/lib/Quox/CharExtra.idr

169 lines
4.5 KiB
Idris

module Quox.CharExtra
import Derive.Prelude
%default total
%language ElabReflection
namespace Letter
public export
data Letter = Uppercase | Lowercase | Titlecase | Modifier | Other
%runElab derive "Letter" [Eq, Ord, Show]
namespace Mark
public export
data Mark = NonSpacing | SpacingCombining | Enclosing
%runElab derive "Mark" [Eq, Ord, Show]
namespace Number
public export
data Number = Decimal | Letter | Other
%runElab derive "Number" [Eq, Ord, Show]
namespace Punctuation
public export
data Punctuation = Connector | Dash | Open | Close
| InitialQuote | FinalQuote | Other
%runElab derive "Punctuation" [Eq, Ord, Show]
namespace Symbol
public export
data Symbol = Math | Currency | Modifier | Other
%runElab derive "Symbol" [Eq, Ord, Show]
namespace Separator
public export
data Separator = Space | Line | Paragraph
%runElab derive "Separator" [Eq, Ord, Show]
namespace Other
public export
data Other = Control | Format | Surrogate | PrivateUse | NotAssigned
%runElab derive "Other" [Eq, Ord, Show]
public export
data GeneralCategory
= Letter Letter
| Mark Mark
| Number Number
| Punctuation Punctuation
| Symbol Symbol
| Separator Separator
| Other Other
%runElab derive "GeneralCategory" [Eq, Ord, Show]
private
%foreign "scheme:(lambda (c) (symbol->string (char-general-category c)))"
prim__genCat : Char -> String
export
genCat : Char -> GeneralCategory
genCat ch = assert_total $
case prim__genCat ch of
"Lu" => Letter Uppercase
"Ll" => Letter Lowercase
"Lt" => Letter Titlecase
"Lm" => Letter Modifier
"Lo" => Letter Other
"Mn" => Mark NonSpacing
"Mc" => Mark SpacingCombining
"Me" => Mark Enclosing
"Nd" => Number Decimal
"Nl" => Number Letter
"No" => Number Other
"Pc" => Punctuation Connector
"Pd" => Punctuation Dash
"Ps" => Punctuation Open
"Pe" => Punctuation Close
"Pi" => Punctuation InitialQuote
"Pf" => Punctuation FinalQuote
"Po" => Punctuation Other
"Sm" => Symbol Math
"Sc" => Symbol Currency
"Sk" => Symbol Modifier
"So" => Symbol Other
"Zs" => Separator Space
"Zl" => Separator Line
"Zp" => Separator Paragraph
"Cc" => Other Control
"Cf" => Other Format
"Cs" => Other Surrogate
"Co" => Other PrivateUse
"Cn" => Other NotAssigned
cat => idris_crash #"Quox.Unicode.genCat: unknown category "\{cat}""#
namespace GeneralCategory
public export %inline
isLetter, isMark, isNumber, isPunctuation, isSymbol, isSeparator, isOther :
GeneralCategory -> Bool
isLetter = \case Letter _ => True; _ => False
isMark = \case Mark _ => True; _ => False
isNumber = \case Number _ => True; _ => False
isPunctuation = \case Punctuation _ => True; _ => False
isSymbol = \case Symbol _ => True; _ => False
isSeparator = \case Separator _ => True; _ => False
isOther = \case Other _ => True; _ => False
namespace Char
public export %inline
isLetter, isMark, isNumber, isPunctuation, isSymbol, isSeparator, isOther :
Char -> Bool
isLetter = isLetter . genCat
isMark = isMark . genCat
isNumber = isNumber . genCat
isPunctuation = isPunctuation . genCat
isSymbol = isSymbol . genCat
isSeparator = isSeparator . genCat
isOther = isOther . genCat
export
isSupDigit : Char -> Bool
isSupDigit ch = ch `elem` unpack "⁰¹²³⁴⁵⁶⁷⁸⁹"
export
isSubDigit : Char -> Bool
isSubDigit ch = ch `elem` unpack "₀₁₂₃₄₅₆₇₈₉"
export
isAsciiDigit : Char -> Bool
isAsciiDigit ch = '0' <= ch && ch <= '9'
export
isIdStart : Char -> Bool
isIdStart ch =
(ch == '_' || isLetter ch || isNumber ch) &&
not (isSupDigit ch || isAsciiDigit ch)
export
isIdCont : Char -> Bool
isIdCont ch =
(isIdStart ch || ch == '\'' || ch == '-' || isMark ch || isNumber ch) &&
not (isSupDigit ch)
export
isIdConnector : Char -> Bool
isIdConnector ch = genCat ch == Punctuation Connector
export
isSymChar : Char -> Bool
isSymChar ch = case genCat ch of
Symbol _ => True
Punctuation Dash => True
Punctuation Other => True
_ => False
export
isWhitespace : Char -> Bool
isWhitespace ch = ch == '\t' || ch == '\r' || ch == '\n' || isSeparator ch
export
%foreign "scheme:string-normalize-nfc"
normalizeNfc : String -> String