quox/lib/Quox/Unicode.idr
2022-05-12 07:42:33 +02:00

140 lines
3.6 KiB
Idris

module Quox.Unicode
import Generics.Derive
%default total
%language ElabReflection
namespace Letter
public export
data Letter = Uppercase | Lowercase | Titlecase | Modifier | Other
%runElab derive "Letter" [Generic, Meta, Eq, Ord, DecEq, Show]
namespace Mark
public export
data Mark = NonSpacing | SpacingCombining | Enclosing
%runElab derive "Mark" [Generic, Meta, Eq, Ord, DecEq, Show]
namespace Number
public export
data Number = Decimal | Letter | Other
%runElab derive "Number" [Generic, Meta, Eq, Ord, DecEq, Show]
namespace Punctuation
public export
data Punctuation = Connector | Dash | Open | Close
| InitialQuote | FinalQuote | Other
%runElab derive "Punctuation" [Generic, Meta, Eq, Ord, DecEq, Show]
namespace Symbol
public export
data Symbol = Math | Currency | Modifier | Other
%runElab derive "Symbol" [Generic, Meta, Eq, Ord, DecEq, Show]
namespace Separator
public export
data Separator = Space | Line | Paragraph
%runElab derive "Separator" [Generic, Meta, Eq, Ord, DecEq, Show]
namespace Other
public export
data Other = Control | Format | Surrogate | PrivateUse | NotAssigned
%runElab derive "Other" [Generic, Meta, Eq, Ord, DecEq, Show]
public export
data GeneralCategory
= Letter Letter
| Mark Mark
| Number Number
| Punctuation Punctuation
| Symbol Symbol
| Separator Separator
| Other Other
%runElab derive "GeneralCategory" [Generic, Meta, Eq, Ord, DecEq, Show]
private
%foreign "scheme:(lambda (c) (symbol->string (char-general-category c)))"
prim__genCat : Char -> String
export
genCat : Char -> GeneralCategory
genCat ch = assert_total $
case prim__genCat ch of
"Lu" => Letter Uppercase
"Ll" => Letter Lowercase
"Lt" => Letter Titlecase
"Lm" => Letter Modifier
"Lo" => Letter Other
"Mn" => Mark NonSpacing
"Mc" => Mark SpacingCombining
"Me" => Mark Enclosing
"Nd" => Number Decimal
"Nl" => Number Letter
"No" => Number Other
"Pc" => Punctuation Connector
"Pd" => Punctuation Dash
"Ps" => Punctuation Open
"Pe" => Punctuation Close
"Pi" => Punctuation InitialQuote
"Pf" => Punctuation FinalQuote
"Po" => Punctuation Other
"Sm" => Symbol Math
"Sc" => Symbol Currency
"Sk" => Symbol Modifier
"So" => Symbol Other
"Zs" => Separator Space
"Zl" => Separator Line
"Zp" => Separator Paragraph
"Cc" => Other Control
"Cf" => Other Format
"Cs" => Other Surrogate
"Co" => Other PrivateUse
"Cn" => Other NotAssigned
export
isIdStart : Char -> Bool
isIdStart ch =
case genCat ch of
Letter _ => True
Number _ => not ('0' <= ch && ch <= '9')
_ => False
export
isIdCont : Char -> Bool
isIdCont ch =
isIdStart ch || ch == '\'' ||
case genCat ch of
Mark _ => True
Number _ => True
_ => False
export
isIdConnector : Char -> Bool
isIdConnector ch =
case genCat ch of Punctuation Connector => True; _ => False
export
isSymChar : Char -> Bool
isSymChar ch =
case genCat ch of
Symbol _ => True
Punctuation Dash => True
Punctuation Other => True
_ => False
export
isWhitespace : Char -> Bool
isWhitespace ch =
ch == '\t' || ch == '\r' || ch == '\n' ||
case genCat ch of Separator _ => True; _ => False
export
%foreign "scheme:string-normalize-nfc"
normalizeNfc : String -> String