unicode stuff
This commit is contained in:
parent
28356200c1
commit
cacb3225a2
3 changed files with 154 additions and 140 deletions
|
@ -1,140 +0,0 @@
|
|||
module Quox.Unicode
|
||||
|
||||
import Generics.Derive
|
||||
|
||||
|
||||
%default total
|
||||
%language ElabReflection
|
||||
|
||||
|
||||
namespace Letter
|
||||
public export
|
||||
data Letter = Uppercase | Lowercase | Titlecase | Modifier | Other
|
||||
%runElab derive "Letter" [Generic, Meta, Eq, Ord, DecEq, Show]
|
||||
|
||||
namespace Mark
|
||||
public export
|
||||
data Mark = NonSpacing | SpacingCombining | Enclosing
|
||||
%runElab derive "Mark" [Generic, Meta, Eq, Ord, DecEq, Show]
|
||||
|
||||
namespace Number
|
||||
public export
|
||||
data Number = Decimal | Letter | Other
|
||||
%runElab derive "Number" [Generic, Meta, Eq, Ord, DecEq, Show]
|
||||
|
||||
namespace Punctuation
|
||||
public export
|
||||
data Punctuation = Connector | Dash | Open | Close
|
||||
| InitialQuote | FinalQuote | Other
|
||||
%runElab derive "Punctuation" [Generic, Meta, Eq, Ord, DecEq, Show]
|
||||
|
||||
namespace Symbol
|
||||
public export
|
||||
data Symbol = Math | Currency | Modifier | Other
|
||||
%runElab derive "Symbol" [Generic, Meta, Eq, Ord, DecEq, Show]
|
||||
|
||||
namespace Separator
|
||||
public export
|
||||
data Separator = Space | Line | Paragraph
|
||||
%runElab derive "Separator" [Generic, Meta, Eq, Ord, DecEq, Show]
|
||||
|
||||
namespace Other
|
||||
public export
|
||||
data Other = Control | Format | Surrogate | PrivateUse | NotAssigned
|
||||
%runElab derive "Other" [Generic, Meta, Eq, Ord, DecEq, Show]
|
||||
|
||||
|
||||
public export
|
||||
data GeneralCategory
|
||||
= Letter Letter
|
||||
| Mark Mark
|
||||
| Number Number
|
||||
| Punctuation Punctuation
|
||||
| Symbol Symbol
|
||||
| Separator Separator
|
||||
| Other Other
|
||||
%runElab derive "GeneralCategory" [Generic, Meta, Eq, Ord, DecEq, Show]
|
||||
|
||||
|
||||
private
|
||||
%foreign "scheme:(lambda (c) (symbol->string (char-general-category c)))"
|
||||
prim__genCat : Char -> String
|
||||
|
||||
export
|
||||
genCat : Char -> GeneralCategory
|
||||
genCat ch = assert_total $
|
||||
case prim__genCat ch of
|
||||
"Lu" => Letter Uppercase
|
||||
"Ll" => Letter Lowercase
|
||||
"Lt" => Letter Titlecase
|
||||
"Lm" => Letter Modifier
|
||||
"Lo" => Letter Other
|
||||
"Mn" => Mark NonSpacing
|
||||
"Mc" => Mark SpacingCombining
|
||||
"Me" => Mark Enclosing
|
||||
"Nd" => Number Decimal
|
||||
"Nl" => Number Letter
|
||||
"No" => Number Other
|
||||
"Pc" => Punctuation Connector
|
||||
"Pd" => Punctuation Dash
|
||||
"Ps" => Punctuation Open
|
||||
"Pe" => Punctuation Close
|
||||
"Pi" => Punctuation InitialQuote
|
||||
"Pf" => Punctuation FinalQuote
|
||||
"Po" => Punctuation Other
|
||||
"Sm" => Symbol Math
|
||||
"Sc" => Symbol Currency
|
||||
"Sk" => Symbol Modifier
|
||||
"So" => Symbol Other
|
||||
"Zs" => Separator Space
|
||||
"Zl" => Separator Line
|
||||
"Zp" => Separator Paragraph
|
||||
"Cc" => Other Control
|
||||
"Cf" => Other Format
|
||||
"Cs" => Other Surrogate
|
||||
"Co" => Other PrivateUse
|
||||
"Cn" => Other NotAssigned
|
||||
|
||||
|
||||
export
|
||||
isIdStart : Char -> Bool
|
||||
isIdStart ch =
|
||||
case genCat ch of
|
||||
Letter _ => True
|
||||
Number _ => not ('0' <= ch && ch <= '9')
|
||||
_ => False
|
||||
|
||||
export
|
||||
isIdCont : Char -> Bool
|
||||
isIdCont ch =
|
||||
isIdStart ch || ch == '\'' ||
|
||||
case genCat ch of
|
||||
Mark _ => True
|
||||
Number _ => True
|
||||
_ => False
|
||||
|
||||
export
|
||||
isIdConnector : Char -> Bool
|
||||
isIdConnector ch =
|
||||
case genCat ch of Punctuation Connector => True; _ => False
|
||||
|
||||
|
||||
export
|
||||
isSymChar : Char -> Bool
|
||||
isSymChar ch =
|
||||
case genCat ch of
|
||||
Symbol _ => True
|
||||
Punctuation Dash => True
|
||||
Punctuation Other => True
|
||||
_ => False
|
||||
|
||||
export
|
||||
isWhitespace : Char -> Bool
|
||||
isWhitespace ch =
|
||||
ch == '\t' || ch == '\r' || ch == '\n' ||
|
||||
case genCat ch of Separator _ => True; _ => False
|
||||
|
||||
|
||||
export
|
||||
%foreign "scheme:string-normalize-nfc"
|
||||
normalizeNfc : String -> String
|
Loading…
Add table
Add a link
Reference in a new issue