switch unsafe to bounded + char encoding
This commit is contained in:
parent
22dfde2936
commit
9a14600a38
3 changed files with 152 additions and 29 deletions
|
@ -37,7 +37,7 @@ flag checked
|
||||||
library
|
library
|
||||||
exposed-modules:
|
exposed-modules:
|
||||||
Data.ByteArray.Builder.Small
|
Data.ByteArray.Builder.Small
|
||||||
Data.ByteArray.Builder.Small.Unsafe
|
Data.ByteArray.Builder.Small.Bounded
|
||||||
build-depends:
|
build-depends:
|
||||||
, base >=4.12.0.0 && <5
|
, base >=4.12.0.0 && <5
|
||||||
, byteslice >=0.1 && <0.2
|
, byteslice >=0.1 && <0.2
|
||||||
|
|
|
@ -7,10 +7,10 @@
|
||||||
{-# language UnboxedTuples #-}
|
{-# language UnboxedTuples #-}
|
||||||
|
|
||||||
module Data.ByteArray.Builder.Small
|
module Data.ByteArray.Builder.Small
|
||||||
( -- * Unsafe Primitives
|
( -- * Bounded Primitives
|
||||||
Builder(..)
|
Builder(..)
|
||||||
, construct
|
, construct
|
||||||
, fromUnsafe
|
, fromBounded
|
||||||
-- * Evaluation
|
-- * Evaluation
|
||||||
, run
|
, run
|
||||||
, pasteST
|
, pasteST
|
||||||
|
@ -34,10 +34,13 @@ module Data.ByteArray.Builder.Small
|
||||||
, word32PaddedUpperHex
|
, word32PaddedUpperHex
|
||||||
, word16PaddedUpperHex
|
, word16PaddedUpperHex
|
||||||
, word8PaddedUpperHex
|
, word8PaddedUpperHex
|
||||||
|
, ascii
|
||||||
|
, char
|
||||||
-- ** Machine-Readable
|
-- ** Machine-Readable
|
||||||
, word64BE
|
, word64BE
|
||||||
, word32BE
|
, word32BE
|
||||||
, word16BE
|
, word16BE
|
||||||
|
, word8
|
||||||
-- * Encode Floating-Point Types
|
-- * Encode Floating-Point Types
|
||||||
-- ** Human-Readable
|
-- ** Human-Readable
|
||||||
, doubleDec
|
, doubleDec
|
||||||
|
@ -62,7 +65,7 @@ import qualified GHC.Exts as Exts
|
||||||
import qualified Data.Text.Short as TS
|
import qualified Data.Text.Short as TS
|
||||||
import qualified Data.Primitive as PM
|
import qualified Data.Primitive as PM
|
||||||
import qualified Data.Vector as V
|
import qualified Data.Vector as V
|
||||||
import qualified Data.ByteArray.Builder.Small.Unsafe as Unsafe
|
import qualified Data.ByteArray.Builder.Small.Bounded as Bounded
|
||||||
|
|
||||||
-- | An unmaterialized sequence of bytes that may be pasted
|
-- | An unmaterialized sequence of bytes that may be pasted
|
||||||
-- into a mutable byte array.
|
-- into a mutable byte array.
|
||||||
|
@ -183,9 +186,9 @@ construct f = Builder
|
||||||
Nothing -> (# s1, (-1#) #)
|
Nothing -> (# s1, (-1#) #)
|
||||||
Just (I# n) -> (# s1, n #)
|
Just (I# n) -> (# s1, n #)
|
||||||
|
|
||||||
fromUnsafe :: forall n. KnownNat n => Unsafe.Builder n -> Builder
|
fromBounded :: forall n. KnownNat n => Bounded.Builder n -> Builder
|
||||||
{-# inline fromUnsafe #-}
|
{-# inline fromBounded #-}
|
||||||
fromUnsafe (Unsafe.Builder f) = Builder $ \arr off len s0 ->
|
fromBounded (Bounded.Builder f) = Builder $ \arr off len s0 ->
|
||||||
case fromIntegral (natVal' (proxy# :: Proxy# n)) of
|
case fromIntegral (natVal' (proxy# :: Proxy# n)) of
|
||||||
I# req -> case len >=# req of
|
I# req -> case len >=# req of
|
||||||
1# -> f arr off s0
|
1# -> f arr off s0
|
||||||
|
@ -223,8 +226,8 @@ slicedUtf8TextJson !src# !soff0# !slen0# = construct $ \(MutableBytes dst doff0
|
||||||
then PM.writeByteArray dst doff (c2w c) *> go (soff + 1) (slen - 1) (doff + 1)
|
then PM.writeByteArray dst doff (c2w c) *> go (soff + 1) (slen - 1) (doff + 1)
|
||||||
else do
|
else do
|
||||||
write2 dst doff '\\' 'u'
|
write2 dst doff '\\' 'u'
|
||||||
doff' <- Unsafe.pasteST
|
doff' <- Bounded.pasteST
|
||||||
(Unsafe.word16PaddedUpperHex (fromIntegral (c2w c)))
|
(Bounded.word16PaddedUpperHex (fromIntegral (c2w c)))
|
||||||
dst (doff + 2)
|
dst (doff + 2)
|
||||||
go (soff + 1) (slen - 1) doff'
|
go (soff + 1) (slen - 1) doff'
|
||||||
else pure doff
|
else pure doff
|
||||||
|
@ -262,61 +265,70 @@ shortTextJsonString a =
|
||||||
-- This encoding never starts with a zero unless the
|
-- This encoding never starts with a zero unless the
|
||||||
-- argument was zero.
|
-- argument was zero.
|
||||||
word64Dec :: Word64 -> Builder
|
word64Dec :: Word64 -> Builder
|
||||||
word64Dec w = fromUnsafe (Unsafe.word64Dec w)
|
word64Dec w = fromBounded (Bounded.word64Dec w)
|
||||||
|
|
||||||
-- | Encodes an unsigned 16-bit integer as decimal.
|
-- | Encodes an unsigned 16-bit integer as decimal.
|
||||||
-- This encoding never starts with a zero unless the
|
-- This encoding never starts with a zero unless the
|
||||||
-- argument was zero.
|
-- argument was zero.
|
||||||
word32Dec :: Word32 -> Builder
|
word32Dec :: Word32 -> Builder
|
||||||
word32Dec w = fromUnsafe (Unsafe.word32Dec w)
|
word32Dec w = fromBounded (Bounded.word32Dec w)
|
||||||
|
|
||||||
-- | Encodes an unsigned 16-bit integer as decimal.
|
-- | Encodes an unsigned 16-bit integer as decimal.
|
||||||
-- This encoding never starts with a zero unless the
|
-- This encoding never starts with a zero unless the
|
||||||
-- argument was zero.
|
-- argument was zero.
|
||||||
word16Dec :: Word16 -> Builder
|
word16Dec :: Word16 -> Builder
|
||||||
word16Dec w = fromUnsafe (Unsafe.word16Dec w)
|
word16Dec w = fromBounded (Bounded.word16Dec w)
|
||||||
|
|
||||||
-- | Encode a double-floating-point number, using decimal notation or
|
-- | Encode a double-floating-point number, using decimal notation or
|
||||||
-- scientific notation depending on the magnitude. This has undefined
|
-- scientific notation depending on the magnitude. This has undefined
|
||||||
-- behavior when representing @+inf@, @-inf@, and @NaN@. It will not
|
-- behavior when representing @+inf@, @-inf@, and @NaN@. It will not
|
||||||
-- crash, but the generated numbers will be nonsense.
|
-- crash, but the generated numbers will be nonsense.
|
||||||
doubleDec :: Double -> Builder
|
doubleDec :: Double -> Builder
|
||||||
doubleDec w = fromUnsafe (Unsafe.doubleDec w)
|
doubleDec w = fromBounded (Bounded.doubleDec w)
|
||||||
|
|
||||||
-- | Encodes a signed 64-bit integer as decimal.
|
-- | Encodes a signed 64-bit integer as decimal.
|
||||||
-- This encoding never starts with a zero unless the argument was zero.
|
-- This encoding never starts with a zero unless the argument was zero.
|
||||||
-- Negative numbers are preceded by a minus sign. Positive numbers
|
-- Negative numbers are preceded by a minus sign. Positive numbers
|
||||||
-- are not preceded by anything.
|
-- are not preceded by anything.
|
||||||
int64Dec :: Int64 -> Builder
|
int64Dec :: Int64 -> Builder
|
||||||
int64Dec w = fromUnsafe (Unsafe.int64Dec w)
|
int64Dec w = fromBounded (Bounded.int64Dec w)
|
||||||
|
|
||||||
-- | Encode a 64-bit unsigned integer as hexadecimal, zero-padding
|
-- | Encode a 64-bit unsigned integer as hexadecimal, zero-padding
|
||||||
-- the encoding to 16 digits. This uses uppercase for the alphabetical
|
-- the encoding to 16 digits. This uses uppercase for the alphabetical
|
||||||
-- digits. For example, this encodes the number 1022 as @00000000000003FE@.
|
-- digits. For example, this encodes the number 1022 as @00000000000003FE@.
|
||||||
word64PaddedUpperHex :: Word64 -> Builder
|
word64PaddedUpperHex :: Word64 -> Builder
|
||||||
word64PaddedUpperHex w =
|
word64PaddedUpperHex w =
|
||||||
fromUnsafe (Unsafe.word64PaddedUpperHex w)
|
fromBounded (Bounded.word64PaddedUpperHex w)
|
||||||
|
|
||||||
-- | Encode a 32-bit unsigned integer as hexadecimal, zero-padding
|
-- | Encode a 32-bit unsigned integer as hexadecimal, zero-padding
|
||||||
-- the encoding to 8 digits. This uses uppercase for the alphabetical
|
-- the encoding to 8 digits. This uses uppercase for the alphabetical
|
||||||
-- digits. For example, this encodes the number 1022 as @000003FE@.
|
-- digits. For example, this encodes the number 1022 as @000003FE@.
|
||||||
word32PaddedUpperHex :: Word32 -> Builder
|
word32PaddedUpperHex :: Word32 -> Builder
|
||||||
word32PaddedUpperHex w =
|
word32PaddedUpperHex w =
|
||||||
fromUnsafe (Unsafe.word32PaddedUpperHex w)
|
fromBounded (Bounded.word32PaddedUpperHex w)
|
||||||
|
|
||||||
-- | Encode a 16-bit unsigned integer as hexadecimal, zero-padding
|
-- | Encode a 16-bit unsigned integer as hexadecimal, zero-padding
|
||||||
-- the encoding to 4 digits. This uses uppercase for the alphabetical
|
-- the encoding to 4 digits. This uses uppercase for the alphabetical
|
||||||
-- digits. For example, this encodes the number 1022 as @03FE@.
|
-- digits. For example, this encodes the number 1022 as @03FE@.
|
||||||
word16PaddedUpperHex :: Word16 -> Builder
|
word16PaddedUpperHex :: Word16 -> Builder
|
||||||
word16PaddedUpperHex w =
|
word16PaddedUpperHex w =
|
||||||
fromUnsafe (Unsafe.word16PaddedUpperHex w)
|
fromBounded (Bounded.word16PaddedUpperHex w)
|
||||||
|
|
||||||
-- | Encode a 8-bit unsigned integer as hexadecimal, zero-padding
|
-- | Encode a 8-bit unsigned integer as hexadecimal, zero-padding
|
||||||
-- the encoding to 2 digits. This uses uppercase for the alphabetical
|
-- the encoding to 2 digits. This uses uppercase for the alphabetical
|
||||||
-- digits. For example, this encodes the number 11 as @0B@.
|
-- digits. For example, this encodes the number 11 as @0B@.
|
||||||
word8PaddedUpperHex :: Word8 -> Builder
|
word8PaddedUpperHex :: Word8 -> Builder
|
||||||
word8PaddedUpperHex w =
|
word8PaddedUpperHex w =
|
||||||
fromUnsafe (Unsafe.word8PaddedUpperHex w)
|
fromBounded (Bounded.word8PaddedUpperHex w)
|
||||||
|
|
||||||
|
-- | Encode an ASCII char.
|
||||||
|
-- Precondition: Input must be an ASCII character. This is not checked.
|
||||||
|
ascii :: Char -> Builder
|
||||||
|
ascii c = fromBounded (Bounded.char c)
|
||||||
|
|
||||||
|
-- | Encode an UTF8 char. This only uses as much space as is required.
|
||||||
|
char :: Char -> Builder
|
||||||
|
char c = fromBounded (Bounded.char c)
|
||||||
|
|
||||||
unST :: ST s a -> State# s -> (# State# s, a #)
|
unST :: ST s a -> State# s -> (# State# s, a #)
|
||||||
unST (ST f) = f
|
unST (ST f) = f
|
||||||
|
@ -328,17 +340,20 @@ shrinkMutableByteArray (MutableByteArray arr) (I# sz) =
|
||||||
-- | Requires exactly 8 bytes. Dump the octets of a 64-bit
|
-- | Requires exactly 8 bytes. Dump the octets of a 64-bit
|
||||||
-- word in a big-endian fashion.
|
-- word in a big-endian fashion.
|
||||||
word64BE :: Word64 -> Builder
|
word64BE :: Word64 -> Builder
|
||||||
word64BE w = fromUnsafe (Unsafe.word64BE w)
|
word64BE w = fromBounded (Bounded.word64BE w)
|
||||||
|
|
||||||
-- | Requires exactly 4 bytes. Dump the octets of a 32-bit
|
-- | Requires exactly 4 bytes. Dump the octets of a 32-bit
|
||||||
-- word in a big-endian fashion.
|
-- word in a big-endian fashion.
|
||||||
word32BE :: Word32 -> Builder
|
word32BE :: Word32 -> Builder
|
||||||
word32BE w = fromUnsafe (Unsafe.word32BE w)
|
word32BE w = fromBounded (Bounded.word32BE w)
|
||||||
|
|
||||||
-- | Requires exactly 2 bytes. Dump the octets of a 16-bit
|
-- | Requires exactly 2 bytes. Dump the octets of a 16-bit
|
||||||
-- word in a big-endian fashion.
|
-- word in a big-endian fashion.
|
||||||
word16BE :: Word16 -> Builder
|
word16BE :: Word16 -> Builder
|
||||||
word16BE w = fromUnsafe (Unsafe.word16BE w)
|
word16BE w = fromBounded (Bounded.word16BE w)
|
||||||
|
|
||||||
|
word8 :: Word8 -> Builder
|
||||||
|
word8 w = fromBounded (Bounded.word8 w)
|
||||||
|
|
||||||
-- ShortText is already UTF-8 encoded. This is a no-op.
|
-- ShortText is already UTF-8 encoded. This is a no-op.
|
||||||
shortTextToByteArray :: ShortText -> ByteArray
|
shortTextToByteArray :: ShortText -> ByteArray
|
||||||
|
@ -349,4 +364,4 @@ indexChar8Array :: ByteArray -> Int -> Char
|
||||||
indexChar8Array (ByteArray b) (I# i) = C# (Exts.indexCharArray# b i)
|
indexChar8Array (ByteArray b) (I# i) = C# (Exts.indexCharArray# b i)
|
||||||
|
|
||||||
c2w :: Char -> Word8
|
c2w :: Char -> Word8
|
||||||
c2w = fromIntegral . ord
|
c2w = fromIntegral . ord
|
||||||
|
|
|
@ -1,8 +1,9 @@
|
||||||
{-# language GADTSyntax #-}
|
{-# language GADTs #-}
|
||||||
{-# language KindSignatures #-}
|
{-# language KindSignatures #-}
|
||||||
{-# language ScopedTypeVariables #-}
|
{-# language ScopedTypeVariables #-}
|
||||||
{-# language BangPatterns #-}
|
{-# language BangPatterns #-}
|
||||||
{-# language MagicHash #-}
|
{-# language MagicHash #-}
|
||||||
|
{-# language BinaryLiterals #-}
|
||||||
{-# language UnboxedTuples #-}
|
{-# language UnboxedTuples #-}
|
||||||
{-# language RankNTypes #-}
|
{-# language RankNTypes #-}
|
||||||
{-# language LambdaCase #-}
|
{-# language LambdaCase #-}
|
||||||
|
@ -10,9 +11,8 @@
|
||||||
{-# language DataKinds #-}
|
{-# language DataKinds #-}
|
||||||
{-# language TypeApplications #-}
|
{-# language TypeApplications #-}
|
||||||
|
|
||||||
-- | The functions in this module do not check to
|
-- | The functions in this module are explict in the amount of bytes they require.
|
||||||
-- see if there is enough space in the buffer.
|
module Data.ByteArray.Builder.Small.Bounded
|
||||||
module Data.ByteArray.Builder.Small.Unsafe
|
|
||||||
( -- * Builder
|
( -- * Builder
|
||||||
Builder(..)
|
Builder(..)
|
||||||
, construct
|
, construct
|
||||||
|
@ -23,6 +23,11 @@ module Data.ByteArray.Builder.Small.Unsafe
|
||||||
, pasteIO
|
, pasteIO
|
||||||
-- * Combine
|
-- * Combine
|
||||||
, append
|
, append
|
||||||
|
-- * Bounds Manipulation
|
||||||
|
, (<=)
|
||||||
|
, lessThanEqual
|
||||||
|
, isLessThanEqual
|
||||||
|
, raise
|
||||||
-- * Encode Integral Types
|
-- * Encode Integral Types
|
||||||
-- ** Human-Readable
|
-- ** Human-Readable
|
||||||
, word64Dec
|
, word64Dec
|
||||||
|
@ -33,6 +38,8 @@ module Data.ByteArray.Builder.Small.Unsafe
|
||||||
, word32PaddedUpperHex
|
, word32PaddedUpperHex
|
||||||
, word16PaddedUpperHex
|
, word16PaddedUpperHex
|
||||||
, word8PaddedUpperHex
|
, word8PaddedUpperHex
|
||||||
|
, ascii
|
||||||
|
, char
|
||||||
-- ** Machine-Readable
|
-- ** Machine-Readable
|
||||||
, word64BE
|
, word64BE
|
||||||
, word32BE
|
, word32BE
|
||||||
|
@ -53,8 +60,9 @@ import GHC.Word
|
||||||
import GHC.Int
|
import GHC.Int
|
||||||
import Data.Kind
|
import Data.Kind
|
||||||
import GHC.TypeLits (KnownNat,Nat,type (+),natVal')
|
import GHC.TypeLits (KnownNat,Nat,type (+),natVal')
|
||||||
|
import qualified GHC.TypeLits as GHC
|
||||||
import Data.Primitive.ByteArray.Offset (MutableByteArrayOffset(..))
|
import Data.Primitive.ByteArray.Offset (MutableByteArrayOffset(..))
|
||||||
import Control.Monad (when)
|
import qualified Control.Category as Cat
|
||||||
|
|
||||||
import qualified Data.Primitive as PM
|
import qualified Data.Primitive as PM
|
||||||
|
|
||||||
|
@ -65,13 +73,16 @@ newtype Builder :: Nat -> Type where
|
||||||
(forall s. MutableByteArray# s -> Int# -> State# s -> (# State# s, Int# #))
|
(forall s. MutableByteArray# s -> Int# -> State# s -> (# State# s, Int# #))
|
||||||
-> Builder n
|
-> Builder n
|
||||||
|
|
||||||
|
knownNat :: KnownNat n => Proxy# n -> Int
|
||||||
|
knownNat p = fromIntegral (natVal' p)
|
||||||
|
|
||||||
-- | Execute the builder. This function is safe.
|
-- | Execute the builder. This function is safe.
|
||||||
run :: forall n. KnownNat n
|
run :: forall n. KnownNat n
|
||||||
=> Builder n -- ^ Builder
|
=> Builder n -- ^ Builder
|
||||||
-> ByteArray
|
-> ByteArray
|
||||||
{-# inline run #-}
|
{-# inline run #-}
|
||||||
run b = runST $ do
|
run b = runST $ do
|
||||||
arr <- newByteArray (fromIntegral (natVal' (proxy# :: Proxy# n)))
|
arr <- newByteArray (knownNat (proxy# :: Proxy# n))
|
||||||
len <- pasteST b arr 0
|
len <- pasteST b arr 0
|
||||||
shrinkMutableByteArray arr len
|
shrinkMutableByteArray arr len
|
||||||
unsafeFreezeByteArray arr
|
unsafeFreezeByteArray arr
|
||||||
|
@ -96,7 +107,7 @@ pasteGrowST :: forall n s. KnownNat n
|
||||||
{-# inline pasteGrowST #-}
|
{-# inline pasteGrowST #-}
|
||||||
pasteGrowST b !(MutableByteArrayOffset{array=arr0,offset=off0}) = do
|
pasteGrowST b !(MutableByteArrayOffset{array=arr0,offset=off0}) = do
|
||||||
sz0 <- PM.getSizeofMutableByteArray arr0
|
sz0 <- PM.getSizeofMutableByteArray arr0
|
||||||
let req = fromIntegral (natVal' (proxy# :: Proxy# n))
|
let req = knownNat (proxy# :: Proxy# n)
|
||||||
let sz1 = off0 + req
|
let sz1 = off0 + req
|
||||||
if sz1 <= sz0
|
if sz1 <= sz0
|
||||||
then do
|
then do
|
||||||
|
@ -131,6 +142,33 @@ append (Builder f) (Builder g) =
|
||||||
Builder $ \arr off0 s0 -> case f arr off0 s0 of
|
Builder $ \arr off0 s0 -> case f arr off0 s0 of
|
||||||
(# s1, r #) -> g arr r s1
|
(# s1, r #) -> g arr r s1
|
||||||
|
|
||||||
|
-- | A proof that n is less than or equal to m
|
||||||
|
newtype (n :: Nat) <= (m :: Nat) = LessThanEqual Int -- m - n
|
||||||
|
|
||||||
|
instance Cat.Category (<=) where
|
||||||
|
id = LessThanEqual 0
|
||||||
|
-- b <= c (c - b) -> a <= b (b - a) -> a <= c (c - a)
|
||||||
|
LessThanEqual cb . LessThanEqual ba = LessThanEqual (cb + ba)
|
||||||
|
|
||||||
|
-- | Dynamically check than 'n' is less than or equal to 'm'
|
||||||
|
isLessThanEqual :: (KnownNat n, KnownNat m) => Proxy# n -> Proxy# m -> Maybe (n <= m)
|
||||||
|
isLessThanEqual n m = if 0 <= diff then Just (LessThanEqual diff) else Nothing
|
||||||
|
where diff = knownNat m - knownNat n
|
||||||
|
|
||||||
|
-- | Statically check than 'n' is less than or equal to 'm'. 'n' and 'm' must be known at compile time.
|
||||||
|
lessThanEqual :: forall n m. (KnownNat n, KnownNat m, n GHC.<= m) => n <= m
|
||||||
|
lessThanEqual = LessThanEqual (knownNat (proxy# :: Proxy# m) - knownNat (proxy# :: Proxy# n))
|
||||||
|
|
||||||
|
-- | Weaken the bound on the maximum number of bytes required.
|
||||||
|
-- >>> :{
|
||||||
|
-- buildNumber :: Either Double Word64 -> Builder 32
|
||||||
|
-- buildNumber = \case
|
||||||
|
-- Left d -> doubleDec d
|
||||||
|
-- Right w -> raise lessThanEqual (word64Dec w)
|
||||||
|
-- :}
|
||||||
|
raise :: forall m n. n <= m -> Builder n -> Builder m
|
||||||
|
raise !_ (Builder f) = Builder f
|
||||||
|
|
||||||
-- | Encode a double-floating-point number, using decimal notation or
|
-- | Encode a double-floating-point number, using decimal notation or
|
||||||
-- scientific notation depending on the magnitude. This has undefined
|
-- scientific notation depending on the magnitude. This has undefined
|
||||||
-- behavior when representing @+inf@, @-inf@, and @NaN@. It will not
|
-- behavior when representing @+inf@, @-inf@, and @NaN@. It will not
|
||||||
|
@ -302,6 +340,76 @@ word8PaddedUpperHex# w# = construct $ \arr off -> do
|
||||||
where
|
where
|
||||||
w = W# w#
|
w = W# w#
|
||||||
|
|
||||||
|
-- | Encode an ASCII char.
|
||||||
|
-- Precondition: Input must be an ASCII character. This is not checked.
|
||||||
|
ascii :: Char -> Builder 1
|
||||||
|
ascii c = word8 (fromIntegral @Int @Word8 (ord c))
|
||||||
|
|
||||||
|
-- | Encode an UTF8 char. This only uses as much space as is required.
|
||||||
|
char :: Char -> Builder 4
|
||||||
|
char c
|
||||||
|
| codepoint < 0x80 = construct $ \arr off -> do
|
||||||
|
writeByteArray arr off (unsafeWordToWord8 codepoint)
|
||||||
|
pure (off + 1)
|
||||||
|
| codepoint < 0x800 = construct $ \arr off -> do
|
||||||
|
writeByteArray arr off (unsafeWordToWord8 (byteTwoOne codepoint))
|
||||||
|
writeByteArray arr (off + 1) (unsafeWordToWord8 (byteTwoTwo codepoint))
|
||||||
|
return (off + 2)
|
||||||
|
| codepoint >= 0xD800 && codepoint < 0xE000 = construct $ \arr off -> do
|
||||||
|
-- Codepoint U+FFFD
|
||||||
|
writeByteArray arr off (0xEF :: Word8)
|
||||||
|
writeByteArray arr (off + 1) (0xBF :: Word8)
|
||||||
|
writeByteArray arr (off + 2) (0xBD :: Word8)
|
||||||
|
return (off + 3)
|
||||||
|
| codepoint < 0x10000 = construct $ \arr off -> do
|
||||||
|
writeByteArray arr off (unsafeWordToWord8 (byteThreeOne codepoint))
|
||||||
|
writeByteArray arr (off + 1) (unsafeWordToWord8 (byteThreeTwo codepoint))
|
||||||
|
writeByteArray arr (off + 2) (unsafeWordToWord8 (byteThreeThree codepoint))
|
||||||
|
return (off + 3)
|
||||||
|
| otherwise = construct $ \arr off -> do
|
||||||
|
writeByteArray arr off (unsafeWordToWord8 (byteFourOne codepoint))
|
||||||
|
writeByteArray arr (off + 1) (unsafeWordToWord8 (byteFourTwo codepoint))
|
||||||
|
writeByteArray arr (off + 2) (unsafeWordToWord8 (byteFourThree codepoint))
|
||||||
|
writeByteArray arr (off + 3) (unsafeWordToWord8 (byteFourFour codepoint))
|
||||||
|
return (off + 4)
|
||||||
|
|
||||||
|
where
|
||||||
|
codepoint :: Word
|
||||||
|
codepoint = fromIntegral (ord c)
|
||||||
|
|
||||||
|
unsafeWordToWord8 :: Word -> Word8
|
||||||
|
unsafeWordToWord8 (W# w) = W8# w
|
||||||
|
|
||||||
|
-- precondition: codepoint is less than 0x800
|
||||||
|
byteTwoOne :: Word -> Word
|
||||||
|
byteTwoOne w = unsafeShiftR w 6 .|. 0b11000000
|
||||||
|
|
||||||
|
byteTwoTwo :: Word -> Word
|
||||||
|
byteTwoTwo w = (w .&. 0b00111111) .|. 0b10000000
|
||||||
|
|
||||||
|
-- precondition: codepoint is less than 0x1000
|
||||||
|
byteThreeOne :: Word -> Word
|
||||||
|
byteThreeOne w = unsafeShiftR w 12 .|. 0b11100000
|
||||||
|
|
||||||
|
byteThreeTwo :: Word -> Word
|
||||||
|
byteThreeTwo w = (0b00111111 .&. unsafeShiftR w 6) .|. 0b10000000
|
||||||
|
|
||||||
|
byteThreeThree :: Word -> Word
|
||||||
|
byteThreeThree w = (w .&. 0b00111111) .|. 0b10000000
|
||||||
|
|
||||||
|
-- precondition: codepoint is less than 0x110000
|
||||||
|
byteFourOne :: Word -> Word
|
||||||
|
byteFourOne w = unsafeShiftR w 18 .|. 0b11110000
|
||||||
|
|
||||||
|
byteFourTwo :: Word -> Word
|
||||||
|
byteFourTwo w = (0b00111111 .&. unsafeShiftR w 12) .|. 0b10000000
|
||||||
|
|
||||||
|
byteFourThree :: Word -> Word
|
||||||
|
byteFourThree w = (0b00111111 .&. unsafeShiftR w 6) .|. 0b10000000
|
||||||
|
|
||||||
|
byteFourFour :: Word -> Word
|
||||||
|
byteFourFour w = (0b00111111 .&. w) .|. 0b10000000
|
||||||
|
|
||||||
-- | Requires exactly 8 bytes. Dump the octets of a 64-bit
|
-- | Requires exactly 8 bytes. Dump the octets of a 64-bit
|
||||||
-- word in a big-endian fashion.
|
-- word in a big-endian fashion.
|
||||||
word64BE :: Word64 -> Builder 8
|
word64BE :: Word64 -> Builder 8
|
Loading…
Reference in a new issue