Switch builder implementation to use chunks (#10)
* switch builder implementation to use chunks * micro-optimize the conditional for builders of length one * use fromBoundedOne for ascii chars
This commit is contained in:
parent
3ad5261ff4
commit
8baf4cc369
6 changed files with 347 additions and 249 deletions
|
@ -35,6 +35,7 @@ expectedSmall :: ByteArray
|
||||||
expectedSmall = Bytes.toByteArray $ Bytes.fromAsciiString
|
expectedSmall = Bytes.toByteArray $ Bytes.fromAsciiString
|
||||||
"((AB59,(1F33,2E71)),((((FA9A,247B),890C),(0F13,((55BF,7CF1),389B))),1205))"
|
"((AB59,(1F33,2E71)),((((FA9A,247B),890C),(0F13,((55BF,7CF1),389B))),1205))"
|
||||||
|
|
||||||
|
|
||||||
exampleSmall :: Word16Tree
|
exampleSmall :: Word16Tree
|
||||||
exampleSmall = Branch
|
exampleSmall = Branch
|
||||||
(Branch
|
(Branch
|
||||||
|
|
|
@ -36,6 +36,7 @@ flag checked
|
||||||
|
|
||||||
library
|
library
|
||||||
exposed-modules:
|
exposed-modules:
|
||||||
|
Data.Bytes.Chunks
|
||||||
Data.ByteArray.Builder
|
Data.ByteArray.Builder
|
||||||
Data.ByteArray.Builder.Unsafe
|
Data.ByteArray.Builder.Unsafe
|
||||||
Data.ByteArray.Builder.Bounded
|
Data.ByteArray.Builder.Bounded
|
||||||
|
@ -90,7 +91,7 @@ benchmark bench
|
||||||
, small-bytearray-builder
|
, small-bytearray-builder
|
||||||
, text-short
|
, text-short
|
||||||
, byteslice
|
, byteslice
|
||||||
ghc-options: -Wall -O2
|
ghc-options: -Wall -O2 -ddump-to-file -ddump-simpl -dsuppress-all
|
||||||
default-language: Haskell2010
|
default-language: Haskell2010
|
||||||
hs-source-dirs: bench, common
|
hs-source-dirs: bench, common
|
||||||
main-is: Main.hs
|
main-is: Main.hs
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
{-# language BangPatterns #-}
|
{-# language BangPatterns #-}
|
||||||
|
{-# language DataKinds #-}
|
||||||
{-# language DuplicateRecordFields #-}
|
{-# language DuplicateRecordFields #-}
|
||||||
{-# language LambdaCase #-}
|
{-# language LambdaCase #-}
|
||||||
{-# language MagicHash #-}
|
{-# language MagicHash #-}
|
||||||
|
@ -9,19 +10,12 @@
|
||||||
module Data.ByteArray.Builder
|
module Data.ByteArray.Builder
|
||||||
( -- * Bounded Primitives
|
( -- * Bounded Primitives
|
||||||
Builder
|
Builder
|
||||||
, construct
|
|
||||||
, fromBounded
|
, fromBounded
|
||||||
-- * Evaluation
|
-- * Evaluation
|
||||||
, run
|
, run
|
||||||
, pasteST
|
|
||||||
, pasteIO
|
|
||||||
, pasteGrowST
|
|
||||||
, pasteGrowIO
|
|
||||||
, pasteArrayST
|
|
||||||
, pasteArrayIO
|
|
||||||
-- * Materialized Byte Sequences
|
-- * Materialized Byte Sequences
|
||||||
, bytes
|
, bytes
|
||||||
, bytearray
|
, byteArray
|
||||||
, shortTextUtf8
|
, shortTextUtf8
|
||||||
, shortTextJsonString
|
, shortTextJsonString
|
||||||
, cstring
|
, cstring
|
||||||
|
@ -77,9 +71,10 @@ module Data.ByteArray.Builder
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import Control.Monad.Primitive (primitive_)
|
import Control.Monad.Primitive (primitive_)
|
||||||
import Control.Monad.ST (ST,stToIO)
|
import Control.Monad.ST (ST,stToIO,runST)
|
||||||
import Control.Monad.ST.Run (runByteArrayST)
|
import Control.Monad.ST.Run (runByteArrayST)
|
||||||
import Data.ByteArray.Builder.Unsafe (Builder(Builder))
|
import Data.ByteArray.Builder.Unsafe (Builder(Builder))
|
||||||
|
import Data.ByteArray.Builder.Unsafe (Commits(Initial,Mutable,Immutable))
|
||||||
import Data.ByteArray.Builder.Unsafe (stringUtf8,cstring)
|
import Data.ByteArray.Builder.Unsafe (stringUtf8,cstring)
|
||||||
import Data.ByteString.Short.Internal (ShortByteString(SBS))
|
import Data.ByteString.Short.Internal (ShortByteString(SBS))
|
||||||
import Data.Bytes.Types (Bytes(Bytes),MutableBytes(MutableBytes))
|
import Data.Bytes.Types (Bytes(Bytes),MutableBytes(MutableBytes))
|
||||||
|
@ -90,8 +85,9 @@ import Data.Primitive.ByteArray.Offset (MutableByteArrayOffset(..))
|
||||||
import Data.Text.Short (ShortText)
|
import Data.Text.Short (ShortText)
|
||||||
import Data.Word (Word64,Word32,Word16,Word8)
|
import Data.Word (Word64,Word32,Word16,Word8)
|
||||||
import GHC.Exts (Int(I#),Char(C#),Int#,State#,ByteArray#,RealWorld,(>=#),(/=#))
|
import GHC.Exts (Int(I#),Char(C#),Int#,State#,ByteArray#,RealWorld,(>=#),(/=#))
|
||||||
import GHC.Exts ((+#),(-#))
|
import GHC.Exts (MutableByteArray#,(+#),(-#),(<#))
|
||||||
import GHC.ST (ST(ST))
|
import GHC.ST (ST(ST))
|
||||||
|
import Data.Bytes.Chunks (Chunks(..))
|
||||||
|
|
||||||
import qualified Arithmetic.Nat as Nat
|
import qualified Arithmetic.Nat as Nat
|
||||||
import qualified Arithmetic.Types as Arithmetic
|
import qualified Arithmetic.Types as Arithmetic
|
||||||
|
@ -102,105 +98,31 @@ import qualified Data.Vector as V
|
||||||
import qualified Data.ByteArray.Builder.Bounded as Bounded
|
import qualified Data.ByteArray.Builder.Bounded as Bounded
|
||||||
import qualified Data.ByteArray.Builder.Bounded.Unsafe as UnsafeBounded
|
import qualified Data.ByteArray.Builder.Bounded.Unsafe as UnsafeBounded
|
||||||
|
|
||||||
-- | Run a builder. An accurate size hint is important for
|
-- | Run a builder.
|
||||||
-- good performance. The size hint should be slightly greater
|
|
||||||
-- than or equal to the actual size.
|
|
||||||
run ::
|
run ::
|
||||||
Int -- ^ Hint for upper bound on size
|
Int -- ^ Size of initial chunk (use 4080 if uncertain)
|
||||||
-> Builder -- ^ Builder
|
-> Builder -- ^ Builder
|
||||||
-> ByteArray
|
-> Chunks
|
||||||
run hint b = runByteArrayST $ do
|
run hint@(I# hint# ) (Builder f) = runST $ do
|
||||||
let go !n = do
|
MutableByteArray buf0 <- PM.newByteArray hint
|
||||||
arr <- PM.newByteArray n
|
cs <- ST $ \s0 -> case f buf0 0# hint# Initial s0 of
|
||||||
pasteST b (MutableBytes arr 0 n) >>= \case
|
(# s1, bufX, offX, _, csX #) ->
|
||||||
Nothing -> go (n + n + 16)
|
(# s1, Mutable bufX offX csX #)
|
||||||
Just len -> do
|
commitsOntoChunks ChunksNil cs
|
||||||
shrinkMutableByteArray arr len
|
|
||||||
PM.unsafeFreezeByteArray arr
|
|
||||||
go (max hint 16)
|
|
||||||
|
|
||||||
-- | Variant of 'pasteArrayST' that runs in 'IO'.
|
-- Internal. This freezes all the mutable byte arrays in-place,
|
||||||
pasteArrayIO ::
|
-- so be careful. It also reverses the chunks since everything
|
||||||
MutableBytes RealWorld -- ^ Buffer
|
-- is backwards.
|
||||||
-> (a -> Builder) -- ^ Builder
|
commitsOntoChunks :: Chunks -> Commits s -> ST s Chunks
|
||||||
-> V.Vector a -- ^ Elements to serialize
|
commitsOntoChunks !xs Initial = pure xs
|
||||||
-> IO (V.Vector a, MutableBytes RealWorld) -- ^ Shifted vector, shifted buffer
|
commitsOntoChunks !xs (Immutable arr off len cs) =
|
||||||
pasteArrayIO !arr f !xs = stToIO (pasteArrayST arr f xs)
|
commitsOntoChunks (ChunksCons (Bytes (ByteArray arr) (I# off) (I# len)) xs) cs
|
||||||
|
commitsOntoChunks !xs (Mutable buf len cs) = case len of
|
||||||
-- | Fold over a vector, applying the builder to each element until
|
0# -> commitsOntoChunks xs cs
|
||||||
-- the buffer cannot accomodate any more.
|
_ -> do
|
||||||
pasteArrayST ::
|
shrinkMutableByteArray (MutableByteArray buf) (I# len)
|
||||||
MutableBytes s -- ^ Buffer
|
arr <- PM.unsafeFreezeByteArray (MutableByteArray buf)
|
||||||
-> (a -> Builder) -- ^ Builder
|
commitsOntoChunks (ChunksCons (Bytes arr 0 (I# len)) xs) cs
|
||||||
-> V.Vector a -- ^ Elements to serialize
|
|
||||||
-> ST s (V.Vector a, MutableBytes s) -- ^ Shifted vector, shifted buffer
|
|
||||||
pasteArrayST (MutableBytes arr off0 len0) f !xs0 = do
|
|
||||||
let go !xs !ixBufA !lenBufA = if V.length xs > 0
|
|
||||||
then do
|
|
||||||
let a = V.unsafeHead xs
|
|
||||||
pasteST (f a) (MutableBytes arr ixBufA lenBufA) >>= \case
|
|
||||||
Nothing -> pure (xs,MutableBytes arr ixBufA lenBufA)
|
|
||||||
Just ixBufB ->
|
|
||||||
go (V.unsafeTail xs) ixBufB (lenBufA + (ixBufA - ixBufB))
|
|
||||||
else pure (xs,MutableBytes arr ixBufA lenBufA)
|
|
||||||
go xs0 off0 len0
|
|
||||||
|
|
||||||
-- | Paste the builder into the byte array starting at offset zero.
|
|
||||||
-- This repeatedly reallocates the byte array if it cannot accomodate
|
|
||||||
-- the builder, replaying the builder each time.
|
|
||||||
pasteGrowST ::
|
|
||||||
Int -- ^ How many bytes to grow by at a time
|
|
||||||
-> Builder
|
|
||||||
-> MutableByteArrayOffset s
|
|
||||||
-- ^ Initial buffer, used linearly. Do not reuse this argument.
|
|
||||||
-> ST s (MutableByteArrayOffset s)
|
|
||||||
-- ^ Final buffer that accomodated the builder.
|
|
||||||
pasteGrowST !n b !(MutableByteArrayOffset arr0 off0) = do
|
|
||||||
let go !arr !sz = pasteST b (MutableBytes arr off0 (sz - off0)) >>= \case
|
|
||||||
Nothing -> do
|
|
||||||
let szNext = sz + n
|
|
||||||
arrNext <- PM.resizeMutableByteArray arr szNext
|
|
||||||
go arrNext szNext
|
|
||||||
Just ix -> pure (MutableByteArrayOffset{array=arr,offset=ix})
|
|
||||||
go arr0 =<< PM.getSizeofMutableByteArray arr0
|
|
||||||
|
|
||||||
-- | Variant of 'pasteGrowST' that runs in 'IO'.
|
|
||||||
pasteGrowIO ::
|
|
||||||
Int -- ^ How many bytes to grow by at a time
|
|
||||||
-> Builder
|
|
||||||
-> MutableByteArrayOffset RealWorld
|
|
||||||
-- ^ Initial buffer, used linearly. Do not reuse this argument.
|
|
||||||
-> IO (MutableByteArrayOffset RealWorld)
|
|
||||||
-- ^ Final buffer that accomodated the builder.
|
|
||||||
pasteGrowIO !n b !arr = stToIO (pasteGrowST n b arr)
|
|
||||||
|
|
||||||
-- | Execute the builder, pasting its contents into a buffer.
|
|
||||||
-- If the buffer is not large enough, this returns 'Nothing'.
|
|
||||||
-- Otherwise, it returns the index in the buffer that follows
|
|
||||||
-- the payload just written.
|
|
||||||
pasteST :: Builder -> MutableBytes s -> ST s (Maybe Int)
|
|
||||||
{-# inline pasteST #-}
|
|
||||||
pasteST (Builder f) (MutableBytes (MutableByteArray arr) (I# off) (I# len)) =
|
|
||||||
ST $ \s0 -> case f arr off len s0 of
|
|
||||||
(# s1, r #) -> if Exts.isTrue# (r /=# (-1#))
|
|
||||||
then (# s1, Just (I# r) #)
|
|
||||||
else (# s1, Nothing #)
|
|
||||||
|
|
||||||
-- | Variant of 'pasteST' that runs in 'IO'.
|
|
||||||
pasteIO :: Builder -> MutableBytes RealWorld -> IO (Maybe Int)
|
|
||||||
{-# inline pasteIO #-}
|
|
||||||
pasteIO b m = stToIO (pasteST b m)
|
|
||||||
|
|
||||||
-- | Constructor for 'Builder' that works on a function with lifted
|
|
||||||
-- arguments instead of unlifted ones. This is just as unsafe as the
|
|
||||||
-- actual constructor.
|
|
||||||
construct :: (forall s. MutableBytes s -> ST s (Maybe Int)) -> Builder
|
|
||||||
construct f = Builder
|
|
||||||
$ \arr off len s0 ->
|
|
||||||
case unST (f (MutableBytes (MutableByteArray arr) (I# off) (I# len))) s0 of
|
|
||||||
(# s1, m #) -> case m of
|
|
||||||
Nothing -> (# s1, (-1#) #)
|
|
||||||
Just (I# n) -> (# s1, n #)
|
|
||||||
|
|
||||||
-- | Convert a bounded builder to an unbounded one. If the size
|
-- | Convert a bounded builder to an unbounded one. If the size
|
||||||
-- is a constant, use @Arithmetic.Nat.constant@ as the first argument
|
-- is a constant, use @Arithmetic.Nat.constant@ as the first argument
|
||||||
|
@ -210,23 +132,51 @@ fromBounded ::
|
||||||
-> Bounded.Builder n
|
-> Bounded.Builder n
|
||||||
-> Builder
|
-> Builder
|
||||||
{-# inline fromBounded #-}
|
{-# inline fromBounded #-}
|
||||||
fromBounded n (UnsafeBounded.Builder f) = Builder $ \arr off len s0 ->
|
fromBounded n (UnsafeBounded.Builder f) = Builder $ \buf0 off0 len0 cs0 s0 ->
|
||||||
let !(I# req) = Nat.demote n in
|
let !(I# req) = Nat.demote n
|
||||||
case len >=# req of
|
!(# s1, buf1, off1, len1, cs1 #) = case len0 >=# req of
|
||||||
1# -> f arr off s0
|
1# -> (# s0, buf0, off0, len0, cs0 #)
|
||||||
_ -> (# s0, (-1#) #)
|
_ -> let !(I# lenX) = max 4080 (I# req) in
|
||||||
|
case Exts.newByteArray# lenX s0 of
|
||||||
|
(# sX, bufX #) ->
|
||||||
|
(# sX, bufX, 0#, lenX, Mutable buf0 off0 cs0 #)
|
||||||
|
in case f buf1 off1 s1 of
|
||||||
|
(# s2, off2 #) -> (# s2, buf1, off2, len1 -# (off2 -# off1), cs1 #)
|
||||||
|
|
||||||
|
-- This is a micro-optimization that uses an equality check instead
|
||||||
|
-- of an inequality check when the required number of bytes is one.
|
||||||
|
-- Use this instead of fromBounded (where possible) leads to marginally
|
||||||
|
-- better results in benchmarks.
|
||||||
|
fromBoundedOne ::
|
||||||
|
Bounded.Builder 1
|
||||||
|
-> Builder
|
||||||
|
{-# inline fromBoundedOne #-}
|
||||||
|
fromBoundedOne (UnsafeBounded.Builder f) = Builder $ \buf0 off0 len0 cs0 s0 ->
|
||||||
|
let !(# s1, buf1, off1, len1, cs1 #) = case len0 of
|
||||||
|
0# -> case Exts.newByteArray# 4080# s0 of
|
||||||
|
(# sX, bufX #) ->
|
||||||
|
(# sX, bufX, 0#, 4080#, Mutable buf0 off0 cs0 #)
|
||||||
|
_ -> (# s0, buf0, off0, len0, cs0 #)
|
||||||
|
in case f buf1 off1 s1 of
|
||||||
|
(# s2, off2 #) -> (# s2, buf1, off2, len1 -# (off2 -# off1), cs1 #)
|
||||||
|
|
||||||
-- | Create a builder from an unsliced byte sequence.
|
-- | Create a builder from an unsliced byte sequence.
|
||||||
bytearray :: ByteArray -> Builder
|
byteArray :: ByteArray -> Builder
|
||||||
bytearray a = bytes (Bytes a 0 (PM.sizeofByteArray a))
|
byteArray a = bytes (Bytes a 0 (PM.sizeofByteArray a))
|
||||||
|
|
||||||
-- | Create a builder from a sliced byte sequence.
|
-- | Create a builder from a sliced byte sequence.
|
||||||
bytes :: Bytes -> Builder
|
bytes :: Bytes -> Builder
|
||||||
bytes (Bytes src soff slen) = construct $ \(MutableBytes arr off len) -> if len >= slen
|
bytes (Bytes (ByteArray src# ) (I# soff# ) slen@(I# slen# )) = Builder
|
||||||
then do
|
(\buf0 off0 len0 cs0 s0 -> if slen >= 1024
|
||||||
PM.copyByteArray arr off src soff slen
|
then case Exts.newByteArray# 0# s0 of
|
||||||
pure (Just (off + slen))
|
(# s1, buf1 #) -> (# s1, buf1, 0#, 0#, Immutable src# soff# slen# (Mutable buf0 off0 cs0) #)
|
||||||
else pure Nothing
|
else case len0 <# slen# of
|
||||||
|
1# -> case Exts.newByteArray# 4080# s0 of
|
||||||
|
(# s1, buf1 #) -> case Exts.copyByteArray# src# soff# buf1 0# slen# s1 of
|
||||||
|
s2 -> (# s2, buf1, slen#, 4080# -# slen#, Mutable buf0 off0 cs0 #)
|
||||||
|
_ -> let !s1 = Exts.copyByteArray# src# soff# buf0 off0 slen# s0 in
|
||||||
|
(# s1, buf0, off0 +# slen#, len0 -# slen#, cs0 #)
|
||||||
|
)
|
||||||
|
|
||||||
-- | Create a builder from a slice of an array of 'Word8'. There is the same
|
-- | Create a builder from a slice of an array of 'Word8'. There is the same
|
||||||
-- as 'bytes' but is provided as a convenience for users working with different
|
-- as 'bytes' but is provided as a convenience for users working with different
|
||||||
|
@ -238,10 +188,7 @@ word8Array (PrimArray arr) off len = bytes (Bytes (ByteArray arr) off len)
|
||||||
-- byte sequence is UTF-8 encoded text.
|
-- byte sequence is UTF-8 encoded text.
|
||||||
slicedUtf8TextJson :: ByteArray# -> Int# -> Int# -> Builder
|
slicedUtf8TextJson :: ByteArray# -> Int# -> Int# -> Builder
|
||||||
{-# inline slicedUtf8TextJson #-}
|
{-# inline slicedUtf8TextJson #-}
|
||||||
slicedUtf8TextJson !src# !soff0# !slen0# = construct $ \(MutableBytes dst doff0 dlen0) ->
|
slicedUtf8TextJson !src# !soff0# !slen0# = fromFunction reqLen $ \dst doff0 -> do
|
||||||
let slen0 = I# slen0#
|
|
||||||
in if dlen0 > (2 * slen0) + 2
|
|
||||||
then do
|
|
||||||
PM.writeByteArray dst doff0 (c2w '"')
|
PM.writeByteArray dst doff0 (c2w '"')
|
||||||
let go !soff !slen !doff = if slen > 0
|
let go !soff !slen !doff = if slen > 0
|
||||||
then case indexChar8Array (ByteArray src#) soff of
|
then case indexChar8Array (ByteArray src#) soff of
|
||||||
|
@ -261,8 +208,24 @@ slicedUtf8TextJson !src# !soff0# !slen0# = construct $ \(MutableBytes dst doff0
|
||||||
else pure doff
|
else pure doff
|
||||||
doffRes <- go (I# soff0#) (I# slen0#) (doff0 + 1)
|
doffRes <- go (I# soff0#) (I# slen0#) (doff0 + 1)
|
||||||
PM.writeByteArray dst doffRes (c2w '"')
|
PM.writeByteArray dst doffRes (c2w '"')
|
||||||
pure (Just (doffRes + 1))
|
pure (doffRes + 1)
|
||||||
else pure Nothing
|
where
|
||||||
|
slen0 = I# slen0#
|
||||||
|
reqLen = (2 * slen0) + 2
|
||||||
|
|
||||||
|
-- | Constructor for 'Builder' that works on a function with lifted
|
||||||
|
-- arguments instead of unlifted ones. This is just as unsafe as the
|
||||||
|
-- actual constructor.
|
||||||
|
fromFunction :: Int -> (forall s. MutableByteArray s -> Int -> ST s Int) -> Builder
|
||||||
|
fromFunction (I# req) f = Builder $ \buf0 off0 len0 cs0 s0 ->
|
||||||
|
let !(# s1, buf1, off1, len1, cs1 #) = case len0 >=# req of
|
||||||
|
1# -> (# s0, buf0, off0, len0, cs0 #)
|
||||||
|
_ -> let !(I# lenX) = max 4080 (I# req) in
|
||||||
|
case Exts.newByteArray# lenX s0 of
|
||||||
|
(# sX, bufX #) ->
|
||||||
|
(# sX, bufX, 0#, lenX, Mutable buf0 off0 cs0 #)
|
||||||
|
in case unST (f (MutableByteArray buf1) (I# off1)) s1 of
|
||||||
|
(# s2, I# off2 #) -> (# s2, buf1, off2, len1 -# (off2 -# off1), cs1 #)
|
||||||
|
|
||||||
-- Internal. Write two characters in the ASCII plane to a byte array.
|
-- Internal. Write two characters in the ASCII plane to a byte array.
|
||||||
write2 :: MutableByteArray s -> Int -> Char -> Char -> ST s ()
|
write2 :: MutableByteArray s -> Int -> Char -> Char -> ST s ()
|
||||||
|
@ -420,7 +383,7 @@ word8PaddedUpperHex w =
|
||||||
-- | Encode an ASCII char.
|
-- | Encode an ASCII char.
|
||||||
-- Precondition: Input must be an ASCII character. This is not checked.
|
-- Precondition: Input must be an ASCII character. This is not checked.
|
||||||
ascii :: Char -> Builder
|
ascii :: Char -> Builder
|
||||||
ascii c = fromBounded Nat.constant (Bounded.ascii c)
|
ascii c = fromBoundedOne (Bounded.ascii c)
|
||||||
|
|
||||||
-- | Encode an UTF8 char. This only uses as much space as is required.
|
-- | Encode an UTF8 char. This only uses as much space as is required.
|
||||||
char :: Char -> Builder
|
char :: Char -> Builder
|
||||||
|
@ -475,7 +438,7 @@ word16BE w = fromBounded Nat.constant (Bounded.word16BE w)
|
||||||
|
|
||||||
-- | Requires exactly 1 byte.
|
-- | Requires exactly 1 byte.
|
||||||
word8 :: Word8 -> Builder
|
word8 :: Word8 -> Builder
|
||||||
word8 w = fromBounded Nat.constant (Bounded.word8 w)
|
word8 w = fromBoundedOne (Bounded.word8 w)
|
||||||
|
|
||||||
-- | Prefix a builder with its size in bytes. This size is
|
-- | Prefix a builder with its size in bytes. This size is
|
||||||
-- presented as a big-endian 32-bit word. The need to prefix
|
-- presented as a big-endian 32-bit word. The need to prefix
|
||||||
|
@ -490,35 +453,54 @@ word8 w = fromBounded Nat.constant (Bounded.word8 w)
|
||||||
-- However, using 'consLength32BE' is much more efficient here
|
-- However, using 'consLength32BE' is much more efficient here
|
||||||
-- since it only materializes the 'ByteArray' once.
|
-- since it only materializes the 'ByteArray' once.
|
||||||
consLength32BE :: Builder -> Builder
|
consLength32BE :: Builder -> Builder
|
||||||
consLength32BE (Builder f) = Builder $ \arr off len s0 -> case len >=# 4# of
|
consLength32BE (Builder f) = Builder $ \buf0 off0 len0 cs0 s0 ->
|
||||||
1# -> case f arr (off +# 4# ) (len -# 4# ) s0 of
|
let !(# s1, buf1, off1, len1, cs1 #) = case len0 >=# 4# of
|
||||||
(# s1, r #) -> case r of
|
1# -> (# s0, buf0, off0, len0, cs0 #)
|
||||||
(-1#) -> (# s1, (-1#) #)
|
_ -> case Exts.newByteArray# 4080# s0 of
|
||||||
_ ->
|
(# sX, bufX #) ->
|
||||||
let ST g = UnsafeBounded.pasteST
|
(# sX, bufX, 0#, 4080#, Mutable buf0 off0 cs0 #)
|
||||||
(Bounded.word32BE (fromIntegral ((I# r - I# off) - 4)))
|
in case f buf1 (off1 +# 4# ) (len1 -# 4# ) cs1 s1 of
|
||||||
(MutableByteArray arr)
|
(# s2, buf2, off2, len2, cs2 #) ->
|
||||||
(I# off)
|
let !dist = case Exts.sameMutableByteArray# buf1 buf2 of
|
||||||
in case g s1 of
|
1# -> off2 -# off1
|
||||||
(# s2, _ #) -> (# s2, r #)
|
_ -> commitDistance buf1 off2 cs2 -# off1
|
||||||
_ -> (# s0, (-1#) #)
|
ST g = UnsafeBounded.pasteST
|
||||||
|
(Bounded.word32BE (fromIntegral (I# (dist -# 4# ))))
|
||||||
|
(MutableByteArray buf1)
|
||||||
|
(I# off1)
|
||||||
|
in case g s2 of
|
||||||
|
(# s3, _ #) -> (# s3, buf2, off2, len2, cs2 #)
|
||||||
|
|
||||||
|
|
||||||
-- | Prefix a builder with its size in bytes. This size is
|
-- | Prefix a builder with its size in bytes. This size is
|
||||||
-- presented as a big-endian 64-bit word. See 'consLength32BE'.
|
-- presented as a big-endian 64-bit word. See 'consLength32BE'.
|
||||||
consLength64BE :: Builder -> Builder
|
consLength64BE :: Builder -> Builder
|
||||||
consLength64BE (Builder f) = Builder $ \arr off len s0 -> case len >=# 8# of
|
consLength64BE (Builder f) = Builder $ \buf0 off0 len0 cs0 s0 ->
|
||||||
1# -> case f arr (off +# 8# ) (len -# 8# ) s0 of
|
let !(# s1, buf1, off1, len1, cs1 #) = case len0 >=# 8# of
|
||||||
(# s1, r #) -> case r of
|
1# -> (# s0, buf0, off0, len0, cs0 #)
|
||||||
(-1#) -> (# s1, (-1#) #)
|
_ -> case Exts.newByteArray# 4080# s0 of
|
||||||
_ ->
|
(# sX, bufX #) ->
|
||||||
let ST g = UnsafeBounded.pasteST
|
(# sX, bufX, 0#, 4080#, Mutable buf0 off0 cs0 #)
|
||||||
(Bounded.word64BE (fromIntegral ((I# r - I# off) - 8)))
|
in case f buf1 (off1 +# 8# ) (len1 -# 8# ) cs1 s1 of
|
||||||
(MutableByteArray arr)
|
(# s2, buf2, off2, len2, cs2 #) ->
|
||||||
(I# off)
|
let !dist = case Exts.sameMutableByteArray# buf1 buf2 of
|
||||||
in case g s1 of
|
1# -> off2 -# off1
|
||||||
(# s2, _ #) -> (# s2, r #)
|
_ -> commitDistance buf1 off2 cs2 -# off1
|
||||||
_ -> (# s0, (-1#) #)
|
ST g = UnsafeBounded.pasteST
|
||||||
|
(Bounded.word64BE (fromIntegral (I# (dist -# 8# ))))
|
||||||
|
(MutableByteArray buf1)
|
||||||
|
(I# off1)
|
||||||
|
in case g s2 of
|
||||||
|
(# s3, _ #) -> (# s3, buf2, off2, len2, cs2 #)
|
||||||
|
|
||||||
|
commitDistance :: MutableByteArray# s -> Int# -> Commits s -> Int#
|
||||||
|
commitDistance _ !_ Initial = error "chunkDistance: chunk not found"
|
||||||
|
commitDistance target !n (Immutable _ _ len cs) =
|
||||||
|
commitDistance target (n +# len) cs
|
||||||
|
commitDistance target !n (Mutable buf len cs) =
|
||||||
|
case Exts.sameMutableByteArray# target buf of
|
||||||
|
1# -> n +# len
|
||||||
|
_ -> commitDistance target (n +# len) cs
|
||||||
|
|
||||||
-- ShortText is already UTF-8 encoded. This is a no-op.
|
-- ShortText is already UTF-8 encoded. This is a no-op.
|
||||||
shortTextToByteArray :: ShortText -> ByteArray
|
shortTextToByteArray :: ShortText -> ByteArray
|
||||||
|
|
|
@ -9,6 +9,7 @@
|
||||||
module Data.ByteArray.Builder.Unsafe
|
module Data.ByteArray.Builder.Unsafe
|
||||||
( -- * Types
|
( -- * Types
|
||||||
Builder(..)
|
Builder(..)
|
||||||
|
, Commits(..)
|
||||||
-- * Safe Functions
|
-- * Safe Functions
|
||||||
-- | These functions are actually completely safe, but they are defined
|
-- | These functions are actually completely safe, but they are defined
|
||||||
-- here because they are used by typeclass instances. Import them from
|
-- here because they are used by typeclass instances. Import them from
|
||||||
|
@ -17,10 +18,10 @@ module Data.ByteArray.Builder.Unsafe
|
||||||
, cstring
|
, cstring
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import Data.Primitive (MutableByteArray(MutableByteArray))
|
import Data.Primitive (MutableByteArray(MutableByteArray),ByteArray)
|
||||||
import Foreign.C.String (CString)
|
import Foreign.C.String (CString)
|
||||||
import GHC.Exts ((-#),(+#),(/=#),(>#))
|
import GHC.Exts ((-#),(+#),(>#))
|
||||||
import GHC.Exts (Addr#,Int(I#),Ptr(Ptr))
|
import GHC.Exts (Addr#,ByteArray#,MutableByteArray#,Int(I#),Ptr(Ptr))
|
||||||
import GHC.Exts (IsString,Int#,State#,MutableByteArray#)
|
import GHC.Exts (IsString,Int#,State#,MutableByteArray#)
|
||||||
import GHC.ST (ST(ST))
|
import GHC.ST (ST(ST))
|
||||||
import GHC.Base (unpackCString#,unpackCStringUtf8#)
|
import GHC.Base (unpackCString#,unpackCStringUtf8#)
|
||||||
|
@ -32,10 +33,14 @@ import qualified Data.ByteArray.Builder.Bounded.Unsafe as UnsafeBounded
|
||||||
-- | An unmaterialized sequence of bytes that may be pasted
|
-- | An unmaterialized sequence of bytes that may be pasted
|
||||||
-- into a mutable byte array.
|
-- into a mutable byte array.
|
||||||
newtype Builder
|
newtype Builder
|
||||||
= Builder (forall s. MutableByteArray# s -> Int# -> Int# -> State# s -> (# State# s, Int# #))
|
= Builder (forall s.
|
||||||
-- ^ This function takes a buffer, an offset, and a number of remaining bytes.
|
MutableByteArray# s -> -- buffer we are currently writing to
|
||||||
-- It returns the new offset (should be greater than the old offset), or if
|
Int# -> -- offset into the current buffer
|
||||||
-- there was not enough space left in buffer, it returns -1.
|
Int# -> -- number of bytes remaining in the current buffer
|
||||||
|
Commits s -> -- buffers and immutable byte slices that we have already committed
|
||||||
|
State# s ->
|
||||||
|
(# State# s, MutableByteArray# s, Int#, Int#, Commits s #) -- all the same things
|
||||||
|
)
|
||||||
|
|
||||||
instance IsString Builder where
|
instance IsString Builder where
|
||||||
{-# inline fromString #-}
|
{-# inline fromString #-}
|
||||||
|
@ -43,53 +48,73 @@ instance IsString Builder where
|
||||||
|
|
||||||
instance Semigroup Builder where
|
instance Semigroup Builder where
|
||||||
{-# inline (<>) #-}
|
{-# inline (<>) #-}
|
||||||
Builder f <> Builder g = Builder $ \arr off0 len0 s0 -> case f arr off0 len0 s0 of
|
Builder f <> Builder g = Builder $ \buf0 off0 len0 cs0 s0 -> case f buf0 off0 len0 cs0 s0 of
|
||||||
(# s1, r #) -> case r /=# (-1#) of
|
(# s1, buf1, off1, len1, cs1 #) -> g buf1 off1 len1 cs1 s1
|
||||||
1# -> g arr r (len0 +# (off0 -# r)) s1
|
|
||||||
_ -> (# s1, (-1#) #)
|
|
||||||
|
|
||||||
instance Monoid Builder where
|
instance Monoid Builder where
|
||||||
{-# inline mempty #-}
|
{-# inline mempty #-}
|
||||||
mempty = Builder $ \_ off0 _ s0 -> (# s0, off0 #)
|
mempty = Builder $ \buf0 off0 len0 cs0 s0 -> (# s0, buf0, off0, len0, cs0 #)
|
||||||
|
|
||||||
|
data Commits s
|
||||||
|
= Mutable
|
||||||
|
(MutableByteArray# s)
|
||||||
|
-- ^ Mutable buffer, start index implicitly zero
|
||||||
|
Int# -- ^ Length (may be smaller than actual length)
|
||||||
|
!(Commits s)
|
||||||
|
| Immutable
|
||||||
|
ByteArray# -- ^ Immutable chunk
|
||||||
|
Int# -- ^ Offset into chunk, not necessarily zero
|
||||||
|
Int# -- ^ Length (may be smaller than actual length)
|
||||||
|
!(Commits s)
|
||||||
|
| Initial
|
||||||
|
|
||||||
-- | Create a builder from a cons-list of 'Char'. These
|
-- | Create a builder from a cons-list of 'Char'. These
|
||||||
-- are be UTF-8 encoded.
|
-- are be UTF-8 encoded.
|
||||||
stringUtf8 :: String -> Builder
|
stringUtf8 :: String -> Builder
|
||||||
{-# inline stringUtf8 #-}
|
{-# inline stringUtf8 #-}
|
||||||
stringUtf8 cs = Builder (\arr off0 len0 s0 -> goString cs arr off0 len0 s0)
|
stringUtf8 cs = Builder (goString cs)
|
||||||
|
|
||||||
-- | Create a builder from a @NUL@-terminated 'CString'. This ignores any
|
-- | Create a builder from a @NUL@-terminated 'CString'. This ignores any
|
||||||
-- textual encoding, copying bytes until @NUL@ is reached.
|
-- textual encoding, copying bytes until @NUL@ is reached.
|
||||||
cstring :: CString -> Builder
|
cstring :: CString -> Builder
|
||||||
{-# inline cstring #-}
|
{-# inline cstring #-}
|
||||||
cstring (Ptr cs) = Builder (\arr off0 len0 s0 -> goCString cs arr off0 len0 s0)
|
cstring (Ptr cs) = Builder (goCString cs)
|
||||||
|
|
||||||
goString :: String -> MutableByteArray# s -> Int# -> Int# -> State# s -> (# State# s, Int# #)
|
goString :: String
|
||||||
|
-> MutableByteArray# s -> Int# -> Int# -> Commits s
|
||||||
|
-> State# s -> (# State# s, MutableByteArray# s, Int#, Int#, Commits s #)
|
||||||
{-# noinline goString #-}
|
{-# noinline goString #-}
|
||||||
goString [] _ off0 _ s0 = (# s0, off0 #)
|
goString [] buf0 off0 len0 cs0 s0 = (# s0, buf0, off0, len0, cs0 #)
|
||||||
goString (c : cs) buf off0 len0 s0 = case len0 ># 3# of
|
goString (c : cs) buf0 off0 len0 cs0 s0 = case len0 ># 3# of
|
||||||
1# -> case unST (UnsafeBounded.pasteST (Bounded.char c) (MutableByteArray buf) (I# off0)) s0 of
|
1# -> case unST (UnsafeBounded.pasteST (Bounded.char c) (MutableByteArray buf0) (I# off0)) s0 of
|
||||||
(# s1, I# off1 #) -> goString cs buf off1 (len0 -# (off1 -# off0)) s1
|
(# s1, I# off1 #) -> goString cs buf0 off1 (len0 -# (off1 -# off0)) cs0 s1
|
||||||
_ -> (# s0, (-1#) #)
|
_ -> case Exts.newByteArray# 4080# s0 of
|
||||||
|
(# s1, buf1 #) -> case unST (UnsafeBounded.pasteST (Bounded.char c) (MutableByteArray buf1) 0) s1 of
|
||||||
|
(# s2, I# off1 #) -> goString cs buf1 off1 (4080# -# off1) (Mutable buf0 off0 cs0) s2
|
||||||
|
|
||||||
-- We have to have a rule for both unpackCString# and unpackCStringUtf8#
|
-- We have to have a rule for both unpackCString# and unpackCStringUtf8#
|
||||||
-- since GHC uses a different function based on whether or not non-ASCII
|
-- since GHC uses a different function based on whether or not non-ASCII
|
||||||
-- codepoints are used in the string.
|
-- codepoints are used in the string.
|
||||||
{-# RULES
|
{-# RULES
|
||||||
"Builder stringUtf8/cstring" forall s a b c d.
|
"Builder stringUtf8/cstring" forall s a b c d e.
|
||||||
goString (unpackCString# s) a b c d = goCString s a b c d
|
goString (unpackCString# s) a b c d e = goCString s a b c d e
|
||||||
"Builder stringUtf8/cstring-utf8" forall s a b c d.
|
"Builder stringUtf8/cstring-utf8" forall s a b c d e.
|
||||||
goString (unpackCStringUtf8# s) a b c d = goCString s a b c d
|
goString (unpackCStringUtf8# s) a b c d e = goCString s a b c d e
|
||||||
#-}
|
#-}
|
||||||
|
|
||||||
goCString :: Addr# -> MutableByteArray# s -> Int# -> Int# -> State# s -> (# State# s, Int# #)
|
goCString :: Addr# -> MutableByteArray# s -> Int# -> Int# -> Commits s
|
||||||
goCString addr buf off0 len0 s0 = case Exts.indexWord8OffAddr# addr 0# of
|
-> State# s -> (# State# s, MutableByteArray# s, Int#, Int#, Commits s #)
|
||||||
0## -> (# s0, off0 #)
|
goCString addr buf0 off0 len0 cs0 s0 = case Exts.indexWord8OffAddr# addr 0# of
|
||||||
|
0## -> (# s0, buf0, off0, len0, cs0 #)
|
||||||
w -> case len0 of
|
w -> case len0 of
|
||||||
0# -> (# s0, (-1#) #)
|
0# -> case Exts.newByteArray# 4080# s0 of
|
||||||
_ -> case Exts.writeWord8Array# buf off0 w s0 of
|
(# s1, buf1 #) -> case Exts.writeWord8Array# buf1 0# w s1 of
|
||||||
s1 -> goCString (Exts.plusAddr# addr 1# ) buf (off0 +# 1# ) (len0 -# 1# ) s1
|
s2 -> goCString
|
||||||
|
(Exts.plusAddr# addr 1# ) buf1 1# (4080# -# 1# )
|
||||||
|
(Mutable buf0 off0 cs0)
|
||||||
|
s2
|
||||||
|
_ -> case Exts.writeWord8Array# buf0 off0 w s0 of
|
||||||
|
s1 -> goCString (Exts.plusAddr# addr 1# ) buf0 (off0 +# 1# ) (len0 -# 1# ) cs0 s1
|
||||||
|
|
||||||
unST :: ST s a -> State# s -> (# State# s, a #)
|
unST :: ST s a -> State# s -> (# State# s, a #)
|
||||||
unST (ST f) = f
|
unST (ST f) = f
|
||||||
|
|
||||||
|
|
76
src/Data/Bytes/Chunks.hs
Normal file
76
src/Data/Bytes/Chunks.hs
Normal file
|
@ -0,0 +1,76 @@
|
||||||
|
{-# language BangPatterns #-}
|
||||||
|
{-# language DerivingStrategies #-}
|
||||||
|
{-# language TypeFamilies #-}
|
||||||
|
{-# language MagicHash #-}
|
||||||
|
{-# language UnboxedTuples #-}
|
||||||
|
{-# language NamedFieldPuns #-}
|
||||||
|
|
||||||
|
module Data.Bytes.Chunks
|
||||||
|
( Chunks(..)
|
||||||
|
, concat
|
||||||
|
) where
|
||||||
|
|
||||||
|
import Prelude hiding (length,concat)
|
||||||
|
|
||||||
|
import GHC.ST (ST(..))
|
||||||
|
import Data.Bytes.Types (Bytes(..))
|
||||||
|
import Data.Primitive (ByteArray(..),MutableByteArray(..))
|
||||||
|
import GHC.Exts (ByteArray#,MutableByteArray#)
|
||||||
|
import GHC.Exts (IsList,Int#,State#,Int(I#),(+#),(-#))
|
||||||
|
import Control.Monad.ST.Run (runByteArrayST)
|
||||||
|
|
||||||
|
import qualified GHC.Exts as Exts
|
||||||
|
import qualified Data.Primitive as PM
|
||||||
|
|
||||||
|
data Chunks
|
||||||
|
= ChunksCons {-# UNPACK #-} !Bytes !Chunks
|
||||||
|
| ChunksNil
|
||||||
|
|
||||||
|
concat :: Chunks -> ByteArray
|
||||||
|
concat x = ByteArray (concat# x)
|
||||||
|
|
||||||
|
concat# :: Chunks -> ByteArray#
|
||||||
|
{-# noinline concat# #-}
|
||||||
|
concat# ChunksNil = case mempty of {ByteArray x -> x}
|
||||||
|
concat# (ChunksCons (Bytes{array=c,offset=coff,length=szc}) cs) = case cs of
|
||||||
|
ChunksNil -> case c of {ByteArray x -> x}
|
||||||
|
ChunksCons (Bytes{array=d,offset=doff,length=szd}) ds ->
|
||||||
|
unBa $ runByteArrayST $ do
|
||||||
|
let szboth = szc + szd
|
||||||
|
len = chunksLengthGo szboth ds
|
||||||
|
dst <- PM.newByteArray len
|
||||||
|
PM.copyByteArray dst 0 c coff szc
|
||||||
|
PM.copyByteArray dst szc d doff szd
|
||||||
|
_ <- copy dst szboth ds
|
||||||
|
PM.unsafeFreezeByteArray dst
|
||||||
|
|
||||||
|
chunksLengthGo :: Int -> Chunks -> Int
|
||||||
|
chunksLengthGo !n ChunksNil = n
|
||||||
|
chunksLengthGo !n (ChunksCons (Bytes{length}) cs) =
|
||||||
|
chunksLengthGo (n + length) cs
|
||||||
|
|
||||||
|
-- | Copy the contents of the chunks into a mutable array.
|
||||||
|
-- Precondition: The destination must have enough space to
|
||||||
|
-- house the contents. This is not checked.
|
||||||
|
copy ::
|
||||||
|
MutableByteArray s -- ^ Destination
|
||||||
|
-> Int -- ^ Destination offset
|
||||||
|
-> Chunks -- ^ Source
|
||||||
|
-> ST s Int -- ^ Returns the next index into the destination after the payload
|
||||||
|
{-# inline copy #-}
|
||||||
|
copy (MutableByteArray dst) (I# off) cs = ST
|
||||||
|
(\s0 -> case copy# dst off cs s0 of
|
||||||
|
(# s1, nextOff #) -> (# s1, I# nextOff #)
|
||||||
|
)
|
||||||
|
|
||||||
|
copy# :: MutableByteArray# s -> Int# -> Chunks -> State# s -> (# State# s, Int# #)
|
||||||
|
copy# _ off ChunksNil s0 = (# s0, off #)
|
||||||
|
copy# marr off (ChunksCons (Bytes{array,offset,length}) cs) s0 =
|
||||||
|
case Exts.copyByteArray# (unBa array) (unI offset) marr off (unI length) s0 of
|
||||||
|
s1 -> copy# marr (off +# unI length) cs s1
|
||||||
|
|
||||||
|
unI :: Int -> Int#
|
||||||
|
unI (I# i) = i
|
||||||
|
|
||||||
|
unBa :: ByteArray -> ByteArray#
|
||||||
|
unBa (ByteArray x) = x
|
109
test/Main.hs
109
test/Main.hs
|
@ -18,6 +18,7 @@ import qualified Arithmetic.Nat as Nat
|
||||||
import qualified Data.ByteString as ByteString
|
import qualified Data.ByteString as ByteString
|
||||||
import qualified Data.ByteString.Builder as BB
|
import qualified Data.ByteString.Builder as BB
|
||||||
import qualified Data.ByteString.Lazy.Char8 as LB
|
import qualified Data.ByteString.Lazy.Char8 as LB
|
||||||
|
import qualified Data.Bytes.Chunks as Chunks
|
||||||
import qualified Data.List as L
|
import qualified Data.List as L
|
||||||
import qualified Data.Primitive as PM
|
import qualified Data.Primitive as PM
|
||||||
import qualified Data.Text as T
|
import qualified Data.Text as T
|
||||||
|
@ -37,79 +38,99 @@ tests :: TestTree
|
||||||
tests = testGroup "Tests"
|
tests = testGroup "Tests"
|
||||||
[ testGroup "live"
|
[ testGroup "live"
|
||||||
[ TQC.testProperty "word64Dec" $ \w ->
|
[ TQC.testProperty "word64Dec" $ \w ->
|
||||||
run 1 (word64Dec w) === pack (show w)
|
runConcat 1 (word64Dec w) === pack (show w)
|
||||||
, TQC.testProperty "word64Dec-x3" $ \x y z ->
|
, TQC.testProperty "word64Dec-x3" $ \x y z ->
|
||||||
run 1 (word64Dec x <> word64Dec y <> word64Dec z)
|
runConcat 1 (word64Dec x <> word64Dec y <> word64Dec z)
|
||||||
===
|
===
|
||||||
pack (show x ++ show y ++ show z)
|
pack (show x ++ show y ++ show z)
|
||||||
, TQC.testProperty "int64Dec-x3" $ \x y z ->
|
, TQC.testProperty "int64Dec-x3" $ \x y z ->
|
||||||
run 1 (int64Dec x <> int64Dec y <> int64Dec z)
|
runConcat 1 (int64Dec x <> int64Dec y <> int64Dec z)
|
||||||
===
|
===
|
||||||
pack (show x ++ show y ++ show z)
|
pack (show x ++ show y ++ show z)
|
||||||
, TQC.testProperty "word64BE-x3" $ \x y z ->
|
, TQC.testProperty "word64BE-x3" $ \x y z ->
|
||||||
run 1 (word64BE x <> word64BE y <> word64BE z)
|
runConcat 1 (word64BE x <> word64BE y <> word64BE z)
|
||||||
===
|
===
|
||||||
pack (LB.unpack (BB.toLazyByteString (BB.word64BE x <> BB.word64BE y <> BB.word64BE z)))
|
pack (LB.unpack (BB.toLazyByteString (BB.word64BE x <> BB.word64BE y <> BB.word64BE z)))
|
||||||
, TQC.testProperty "word64PaddedUpperHex" $ \w ->
|
, TQC.testProperty "word64PaddedUpperHex" $ \w ->
|
||||||
run 1 (word64PaddedUpperHex w)
|
runConcat 1 (word64PaddedUpperHex w)
|
||||||
===
|
===
|
||||||
pack (showWord64PaddedUpperHex w)
|
pack (showWord64PaddedUpperHex w)
|
||||||
, TQC.testProperty "word8Dec" $ \w ->
|
, TQC.testProperty "word8Dec" $ \w ->
|
||||||
run 1 (word8Dec w)
|
runConcat 1 (word8Dec w)
|
||||||
===
|
===
|
||||||
pack (show w)
|
pack (show w)
|
||||||
, TQC.testProperty "consLength32BE" $ \w ->
|
, TQC.testProperty "consLength32BE" $ \w ->
|
||||||
run 1 (consLength32BE (word8Dec w))
|
runConcat 1 (consLength32BE (word8Dec w))
|
||||||
===
|
===
|
||||||
pack ('\x00' : '\x00' : '\x00' : chr (L.length (show w)) : show w)
|
pack ('\x00' : '\x00' : '\x00' : chr (L.length (show w)) : show w)
|
||||||
, TQC.testProperty "consLength64BE" $ \w ->
|
, TQC.testProperty "consLength64BE" $ \w ->
|
||||||
run 1 (consLength64BE (word16Dec w))
|
pack
|
||||||
|
( '\x00' : '\x00' : '\x00' : '\x00'
|
||||||
|
: '\x00' : '\x00' : '\x00' : chr (L.length (show w))
|
||||||
|
: show w
|
||||||
|
)
|
||||||
===
|
===
|
||||||
pack ('\x00' : '\x00' : '\x00' : '\x00' : '\x00' : '\x00' : '\x00' : chr (L.length (show w)) : show w)
|
runConcat 1 (consLength64BE (word16Dec w))
|
||||||
, TQC.testProperty "pasteArrayST" $ \(xs :: [Word64]) ->
|
|
||||||
(runArray word64Dec (V.fromList xs))
|
|
||||||
===
|
|
||||||
pack (foldMap show xs)
|
|
||||||
, THU.testCase "stringUtf8" $
|
, THU.testCase "stringUtf8" $
|
||||||
packUtf8 "¿Cómo estás? I am doing well." @=?
|
packUtf8 "¿Cómo estás? I am doing well." @=?
|
||||||
run 1 (stringUtf8 "¿Cómo estás? I am doing well.")
|
runConcat 1 (stringUtf8 "¿Cómo estás? I am doing well.")
|
||||||
, THU.testCase "doubleDec-A" $
|
, THU.testCase "doubleDec-A" $
|
||||||
pack (show (2 :: Int)) @=? run 1 (doubleDec 2.0)
|
pack (show (2 :: Int)) @=? runConcat 1 (doubleDec 2.0)
|
||||||
, THU.testCase "doubleDec-B" $
|
, THU.testCase "doubleDec-B" $
|
||||||
pack (show (2.5 :: Double)) @=? run 1 (doubleDec 2.5)
|
pack (show (2.5 :: Double)) @=? runConcat 1 (doubleDec 2.5)
|
||||||
, THU.testCase "doubleDec-C" $
|
, THU.testCase "doubleDec-C" $
|
||||||
pack ("1e+15") @=? run 1 (doubleDec 1e15)
|
pack ("1e+15") @=? runConcat 1 (doubleDec 1e15)
|
||||||
, THU.testCase "doubleDec-D" $
|
, THU.testCase "doubleDec-D" $
|
||||||
pack ("-42") @=? run 1 (doubleDec (-42))
|
pack ("-42") @=? runConcat 1 (doubleDec (-42))
|
||||||
, THU.testCase "doubleDec-E" $
|
, THU.testCase "doubleDec-E" $
|
||||||
pack ("-8.88888888888888e+14") @=? run 1 (doubleDec (-888888888888888.8888888))
|
pack ("-8.88888888888888e+14") @=? runConcat 1 (doubleDec (-888888888888888.8888888))
|
||||||
, THU.testCase "doubleDec-F" $
|
, THU.testCase "doubleDec-F" $
|
||||||
pack ("42") @=? run 1 (doubleDec 42)
|
pack ("42") @=? runConcat 1 (doubleDec 42)
|
||||||
, THU.testCase "doubleDec-G" $
|
, THU.testCase "doubleDec-G" $
|
||||||
pack ("0") @=? run 1 (doubleDec 0)
|
pack ("0") @=? runConcat 1 (doubleDec 0)
|
||||||
, THU.testCase "doubleDec-H" $
|
, THU.testCase "doubleDec-H" $
|
||||||
pack ("0.5") @=? run 1 (doubleDec 0.5)
|
pack ("0.5") @=? runConcat 1 (doubleDec 0.5)
|
||||||
, THU.testCase "doubleDec-I" $
|
, THU.testCase "doubleDec-I" $
|
||||||
pack ("-0.5") @=? run 1 (doubleDec (-0.5))
|
pack ("-0.5") @=? runConcat 1 (doubleDec (-0.5))
|
||||||
, THU.testCase "doubleDec-J" $
|
, THU.testCase "doubleDec-J" $
|
||||||
pack ("999999999") @=? run 1 (doubleDec 999999999)
|
pack ("999999999") @=? runConcat 1 (doubleDec 999999999)
|
||||||
, THU.testCase "doubleDec-K" $
|
, THU.testCase "doubleDec-K" $
|
||||||
pack ("-99999999") @=? run 1 (doubleDec (-99999999))
|
pack ("-99999999") @=? runConcat 1 (doubleDec (-99999999))
|
||||||
, THU.testCase "shortTextJsonString-A" $
|
, THU.testCase "shortTextJsonString-A" $
|
||||||
pack ("\"hello\"") @=? run 1 (shortTextJsonString "hello")
|
pack ("\"hello\"") @=? runConcat 1 (shortTextJsonString "hello")
|
||||||
, THU.testCase "shortTextJsonString-B" $
|
, THU.testCase "shortTextJsonString-B" $
|
||||||
pack ("\"\\\\_\\\"_/\"") @=? run 1 (shortTextJsonString "\\_\"_/")
|
pack ("\"\\\\_\\\"_/\"") @=? runConcat 1 (shortTextJsonString "\\_\"_/")
|
||||||
, THU.testCase "shortTextJsonString-C" $
|
, THU.testCase "shortTextJsonString-C" $
|
||||||
pack ("\"Hi\\r\\nLo\"") @=? run 1 (shortTextJsonString "Hi\r\nLo")
|
pack ("\"Hi\\r\\nLo\"") @=? runConcat 1 (shortTextJsonString "Hi\r\nLo")
|
||||||
, THU.testCase "shortTextJsonString-D" $
|
, THU.testCase "shortTextJsonString-D" $
|
||||||
pack ("\"Hi\\u001BLo\"") @=? run 1 (shortTextJsonString "Hi\ESCLo")
|
pack ("\"Hi\\u001BLo\"") @=? runConcat 1 (shortTextJsonString "Hi\ESCLo")
|
||||||
, THU.testCase "word-16-tree" $
|
, THU.testCase "word-16-tree" $
|
||||||
Word16Tree.expectedSmall @=? run 1
|
Word16Tree.expectedSmall @=? runConcat 1
|
||||||
(Word16Tree.encode Word16Tree.exampleSmall)
|
(Word16Tree.encode Word16Tree.exampleSmall)
|
||||||
|
, THU.testCase "byteArray-small" $
|
||||||
|
let a = replicateByte 3 0x50
|
||||||
|
b = replicateByte 5 0x51
|
||||||
|
in mconcat [a,b] @=? runConcat 1
|
||||||
|
( byteArray a <> byteArray b )
|
||||||
|
, THU.testCase "byteArray-big" $
|
||||||
|
let a = replicateByte 2105 0x50
|
||||||
|
b = replicateByte 725 0x51
|
||||||
|
c = replicateByte 900 0x52
|
||||||
|
d = replicateByte 800 0x53
|
||||||
|
e = replicateByte 700 0x54
|
||||||
|
f = replicateByte 950 0x55
|
||||||
|
g = replicateByte 975 0x56
|
||||||
|
h = replicateByte 3000 0x57
|
||||||
|
i = replicateByte 125 0x58
|
||||||
|
in mconcat [a,b,c,d,e,f,g,h,i] @=? runConcat 1
|
||||||
|
( byteArray a <> byteArray b <> byteArray c <>
|
||||||
|
byteArray d <> byteArray e <> byteArray f <>
|
||||||
|
byteArray g <> byteArray h <> byteArray i
|
||||||
|
)
|
||||||
]
|
]
|
||||||
, testGroup "alternate"
|
, testGroup "alternate"
|
||||||
[ TQC.testProperty "HexWord64" $ \x y ->
|
[ TQC.testProperty "HexWord64" $ \x y ->
|
||||||
run 1
|
runConcat 1
|
||||||
( fromBounded Nat.constant (HexWord64.word64PaddedUpperHex x)
|
( fromBounded Nat.constant (HexWord64.word64PaddedUpperHex x)
|
||||||
<> fromBounded Nat.constant (HexWord64.word64PaddedUpperHex y)
|
<> fromBounded Nat.constant (HexWord64.word64PaddedUpperHex y)
|
||||||
)
|
)
|
||||||
|
@ -118,28 +139,20 @@ tests = testGroup "Tests"
|
||||||
]
|
]
|
||||||
]
|
]
|
||||||
|
|
||||||
|
replicateByte :: Int -> Word8 -> ByteArray
|
||||||
|
replicateByte n w = runST $ do
|
||||||
|
m <- PM.newByteArray n
|
||||||
|
PM.setByteArray m 0 n w
|
||||||
|
PM.unsafeFreezeByteArray m
|
||||||
|
|
||||||
pack :: String -> ByteArray
|
pack :: String -> ByteArray
|
||||||
pack = Exts.fromList . map (fromIntegral @Int @Word8 . ord)
|
pack = Exts.fromList . map (fromIntegral @Int @Word8 . ord)
|
||||||
|
|
||||||
packUtf8 :: String -> ByteArray
|
packUtf8 :: String -> ByteArray
|
||||||
packUtf8 = Exts.fromList . ByteString.unpack . TE.encodeUtf8 . T.pack
|
packUtf8 = Exts.fromList . ByteString.unpack . TE.encodeUtf8 . T.pack
|
||||||
|
|
||||||
-- This is used to test pasteArrayST
|
|
||||||
runArray ::
|
|
||||||
(a -> Builder) -- ^ Builder
|
|
||||||
-> V.Vector a -- ^ Elements to serialize
|
|
||||||
-> ByteArray -- ^ Number of elements serialized, serialization
|
|
||||||
runArray f !xs = runST $ do
|
|
||||||
let go !v0 !sz !chunks = if V.null v0
|
|
||||||
then pure (mconcat (L.reverse chunks))
|
|
||||||
else do
|
|
||||||
arr <- PM.newByteArray sz
|
|
||||||
(v1,MutableBytes _ off _) <- pasteArrayST (MutableBytes arr 0 sz) f v0
|
|
||||||
-- If nothing was serialized, we need a bigger buffer
|
|
||||||
let szNext = if V.length v0 == V.length v1 then sz + 1 else sz
|
|
||||||
c <- PM.unsafeFreezeByteArray =<< PM.resizeMutableByteArray arr off
|
|
||||||
go v1 szNext (c : chunks)
|
|
||||||
go xs 1 []
|
|
||||||
|
|
||||||
showWord64PaddedUpperHex :: Word64 -> String
|
showWord64PaddedUpperHex :: Word64 -> String
|
||||||
showWord64PaddedUpperHex = printf "%016X"
|
showWord64PaddedUpperHex = printf "%016X"
|
||||||
|
|
||||||
|
runConcat :: Int -> Builder -> ByteArray
|
||||||
|
runConcat n = Chunks.concat . run n
|
||||||
|
|
Loading…
Reference in a new issue