Switch builder implementation to use chunks (#10)
* switch builder implementation to use chunks * micro-optimize the conditional for builders of length one * use fromBoundedOne for ascii chars
This commit is contained in:
parent
3ad5261ff4
commit
8baf4cc369
6 changed files with 347 additions and 249 deletions
|
@ -35,6 +35,7 @@ expectedSmall :: ByteArray
|
|||
expectedSmall = Bytes.toByteArray $ Bytes.fromAsciiString
|
||||
"((AB59,(1F33,2E71)),((((FA9A,247B),890C),(0F13,((55BF,7CF1),389B))),1205))"
|
||||
|
||||
|
||||
exampleSmall :: Word16Tree
|
||||
exampleSmall = Branch
|
||||
(Branch
|
||||
|
|
|
@ -36,6 +36,7 @@ flag checked
|
|||
|
||||
library
|
||||
exposed-modules:
|
||||
Data.Bytes.Chunks
|
||||
Data.ByteArray.Builder
|
||||
Data.ByteArray.Builder.Unsafe
|
||||
Data.ByteArray.Builder.Bounded
|
||||
|
@ -90,7 +91,7 @@ benchmark bench
|
|||
, small-bytearray-builder
|
||||
, text-short
|
||||
, byteslice
|
||||
ghc-options: -Wall -O2
|
||||
ghc-options: -Wall -O2 -ddump-to-file -ddump-simpl -dsuppress-all
|
||||
default-language: Haskell2010
|
||||
hs-source-dirs: bench, common
|
||||
main-is: Main.hs
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
{-# language BangPatterns #-}
|
||||
{-# language DataKinds #-}
|
||||
{-# language DuplicateRecordFields #-}
|
||||
{-# language LambdaCase #-}
|
||||
{-# language MagicHash #-}
|
||||
|
@ -9,19 +10,12 @@
|
|||
module Data.ByteArray.Builder
|
||||
( -- * Bounded Primitives
|
||||
Builder
|
||||
, construct
|
||||
, fromBounded
|
||||
-- * Evaluation
|
||||
, run
|
||||
, pasteST
|
||||
, pasteIO
|
||||
, pasteGrowST
|
||||
, pasteGrowIO
|
||||
, pasteArrayST
|
||||
, pasteArrayIO
|
||||
-- * Materialized Byte Sequences
|
||||
, bytes
|
||||
, bytearray
|
||||
, byteArray
|
||||
, shortTextUtf8
|
||||
, shortTextJsonString
|
||||
, cstring
|
||||
|
@ -77,9 +71,10 @@ module Data.ByteArray.Builder
|
|||
) where
|
||||
|
||||
import Control.Monad.Primitive (primitive_)
|
||||
import Control.Monad.ST (ST,stToIO)
|
||||
import Control.Monad.ST (ST,stToIO,runST)
|
||||
import Control.Monad.ST.Run (runByteArrayST)
|
||||
import Data.ByteArray.Builder.Unsafe (Builder(Builder))
|
||||
import Data.ByteArray.Builder.Unsafe (Commits(Initial,Mutable,Immutable))
|
||||
import Data.ByteArray.Builder.Unsafe (stringUtf8,cstring)
|
||||
import Data.ByteString.Short.Internal (ShortByteString(SBS))
|
||||
import Data.Bytes.Types (Bytes(Bytes),MutableBytes(MutableBytes))
|
||||
|
@ -90,8 +85,9 @@ import Data.Primitive.ByteArray.Offset (MutableByteArrayOffset(..))
|
|||
import Data.Text.Short (ShortText)
|
||||
import Data.Word (Word64,Word32,Word16,Word8)
|
||||
import GHC.Exts (Int(I#),Char(C#),Int#,State#,ByteArray#,RealWorld,(>=#),(/=#))
|
||||
import GHC.Exts ((+#),(-#))
|
||||
import GHC.Exts (MutableByteArray#,(+#),(-#),(<#))
|
||||
import GHC.ST (ST(ST))
|
||||
import Data.Bytes.Chunks (Chunks(..))
|
||||
|
||||
import qualified Arithmetic.Nat as Nat
|
||||
import qualified Arithmetic.Types as Arithmetic
|
||||
|
@ -102,105 +98,31 @@ import qualified Data.Vector as V
|
|||
import qualified Data.ByteArray.Builder.Bounded as Bounded
|
||||
import qualified Data.ByteArray.Builder.Bounded.Unsafe as UnsafeBounded
|
||||
|
||||
-- | Run a builder. An accurate size hint is important for
|
||||
-- good performance. The size hint should be slightly greater
|
||||
-- than or equal to the actual size.
|
||||
-- | Run a builder.
|
||||
run ::
|
||||
Int -- ^ Hint for upper bound on size
|
||||
Int -- ^ Size of initial chunk (use 4080 if uncertain)
|
||||
-> Builder -- ^ Builder
|
||||
-> ByteArray
|
||||
run hint b = runByteArrayST $ do
|
||||
let go !n = do
|
||||
arr <- PM.newByteArray n
|
||||
pasteST b (MutableBytes arr 0 n) >>= \case
|
||||
Nothing -> go (n + n + 16)
|
||||
Just len -> do
|
||||
shrinkMutableByteArray arr len
|
||||
PM.unsafeFreezeByteArray arr
|
||||
go (max hint 16)
|
||||
-> Chunks
|
||||
run hint@(I# hint# ) (Builder f) = runST $ do
|
||||
MutableByteArray buf0 <- PM.newByteArray hint
|
||||
cs <- ST $ \s0 -> case f buf0 0# hint# Initial s0 of
|
||||
(# s1, bufX, offX, _, csX #) ->
|
||||
(# s1, Mutable bufX offX csX #)
|
||||
commitsOntoChunks ChunksNil cs
|
||||
|
||||
-- | Variant of 'pasteArrayST' that runs in 'IO'.
|
||||
pasteArrayIO ::
|
||||
MutableBytes RealWorld -- ^ Buffer
|
||||
-> (a -> Builder) -- ^ Builder
|
||||
-> V.Vector a -- ^ Elements to serialize
|
||||
-> IO (V.Vector a, MutableBytes RealWorld) -- ^ Shifted vector, shifted buffer
|
||||
pasteArrayIO !arr f !xs = stToIO (pasteArrayST arr f xs)
|
||||
|
||||
-- | Fold over a vector, applying the builder to each element until
|
||||
-- the buffer cannot accomodate any more.
|
||||
pasteArrayST ::
|
||||
MutableBytes s -- ^ Buffer
|
||||
-> (a -> Builder) -- ^ Builder
|
||||
-> V.Vector a -- ^ Elements to serialize
|
||||
-> ST s (V.Vector a, MutableBytes s) -- ^ Shifted vector, shifted buffer
|
||||
pasteArrayST (MutableBytes arr off0 len0) f !xs0 = do
|
||||
let go !xs !ixBufA !lenBufA = if V.length xs > 0
|
||||
then do
|
||||
let a = V.unsafeHead xs
|
||||
pasteST (f a) (MutableBytes arr ixBufA lenBufA) >>= \case
|
||||
Nothing -> pure (xs,MutableBytes arr ixBufA lenBufA)
|
||||
Just ixBufB ->
|
||||
go (V.unsafeTail xs) ixBufB (lenBufA + (ixBufA - ixBufB))
|
||||
else pure (xs,MutableBytes arr ixBufA lenBufA)
|
||||
go xs0 off0 len0
|
||||
|
||||
-- | Paste the builder into the byte array starting at offset zero.
|
||||
-- This repeatedly reallocates the byte array if it cannot accomodate
|
||||
-- the builder, replaying the builder each time.
|
||||
pasteGrowST ::
|
||||
Int -- ^ How many bytes to grow by at a time
|
||||
-> Builder
|
||||
-> MutableByteArrayOffset s
|
||||
-- ^ Initial buffer, used linearly. Do not reuse this argument.
|
||||
-> ST s (MutableByteArrayOffset s)
|
||||
-- ^ Final buffer that accomodated the builder.
|
||||
pasteGrowST !n b !(MutableByteArrayOffset arr0 off0) = do
|
||||
let go !arr !sz = pasteST b (MutableBytes arr off0 (sz - off0)) >>= \case
|
||||
Nothing -> do
|
||||
let szNext = sz + n
|
||||
arrNext <- PM.resizeMutableByteArray arr szNext
|
||||
go arrNext szNext
|
||||
Just ix -> pure (MutableByteArrayOffset{array=arr,offset=ix})
|
||||
go arr0 =<< PM.getSizeofMutableByteArray arr0
|
||||
|
||||
-- | Variant of 'pasteGrowST' that runs in 'IO'.
|
||||
pasteGrowIO ::
|
||||
Int -- ^ How many bytes to grow by at a time
|
||||
-> Builder
|
||||
-> MutableByteArrayOffset RealWorld
|
||||
-- ^ Initial buffer, used linearly. Do not reuse this argument.
|
||||
-> IO (MutableByteArrayOffset RealWorld)
|
||||
-- ^ Final buffer that accomodated the builder.
|
||||
pasteGrowIO !n b !arr = stToIO (pasteGrowST n b arr)
|
||||
|
||||
-- | Execute the builder, pasting its contents into a buffer.
|
||||
-- If the buffer is not large enough, this returns 'Nothing'.
|
||||
-- Otherwise, it returns the index in the buffer that follows
|
||||
-- the payload just written.
|
||||
pasteST :: Builder -> MutableBytes s -> ST s (Maybe Int)
|
||||
{-# inline pasteST #-}
|
||||
pasteST (Builder f) (MutableBytes (MutableByteArray arr) (I# off) (I# len)) =
|
||||
ST $ \s0 -> case f arr off len s0 of
|
||||
(# s1, r #) -> if Exts.isTrue# (r /=# (-1#))
|
||||
then (# s1, Just (I# r) #)
|
||||
else (# s1, Nothing #)
|
||||
|
||||
-- | Variant of 'pasteST' that runs in 'IO'.
|
||||
pasteIO :: Builder -> MutableBytes RealWorld -> IO (Maybe Int)
|
||||
{-# inline pasteIO #-}
|
||||
pasteIO b m = stToIO (pasteST b m)
|
||||
|
||||
-- | Constructor for 'Builder' that works on a function with lifted
|
||||
-- arguments instead of unlifted ones. This is just as unsafe as the
|
||||
-- actual constructor.
|
||||
construct :: (forall s. MutableBytes s -> ST s (Maybe Int)) -> Builder
|
||||
construct f = Builder
|
||||
$ \arr off len s0 ->
|
||||
case unST (f (MutableBytes (MutableByteArray arr) (I# off) (I# len))) s0 of
|
||||
(# s1, m #) -> case m of
|
||||
Nothing -> (# s1, (-1#) #)
|
||||
Just (I# n) -> (# s1, n #)
|
||||
-- Internal. This freezes all the mutable byte arrays in-place,
|
||||
-- so be careful. It also reverses the chunks since everything
|
||||
-- is backwards.
|
||||
commitsOntoChunks :: Chunks -> Commits s -> ST s Chunks
|
||||
commitsOntoChunks !xs Initial = pure xs
|
||||
commitsOntoChunks !xs (Immutable arr off len cs) =
|
||||
commitsOntoChunks (ChunksCons (Bytes (ByteArray arr) (I# off) (I# len)) xs) cs
|
||||
commitsOntoChunks !xs (Mutable buf len cs) = case len of
|
||||
0# -> commitsOntoChunks xs cs
|
||||
_ -> do
|
||||
shrinkMutableByteArray (MutableByteArray buf) (I# len)
|
||||
arr <- PM.unsafeFreezeByteArray (MutableByteArray buf)
|
||||
commitsOntoChunks (ChunksCons (Bytes arr 0 (I# len)) xs) cs
|
||||
|
||||
-- | Convert a bounded builder to an unbounded one. If the size
|
||||
-- is a constant, use @Arithmetic.Nat.constant@ as the first argument
|
||||
|
@ -210,23 +132,51 @@ fromBounded ::
|
|||
-> Bounded.Builder n
|
||||
-> Builder
|
||||
{-# inline fromBounded #-}
|
||||
fromBounded n (UnsafeBounded.Builder f) = Builder $ \arr off len s0 ->
|
||||
let !(I# req) = Nat.demote n in
|
||||
case len >=# req of
|
||||
1# -> f arr off s0
|
||||
_ -> (# s0, (-1#) #)
|
||||
fromBounded n (UnsafeBounded.Builder f) = Builder $ \buf0 off0 len0 cs0 s0 ->
|
||||
let !(I# req) = Nat.demote n
|
||||
!(# s1, buf1, off1, len1, cs1 #) = case len0 >=# req of
|
||||
1# -> (# s0, buf0, off0, len0, cs0 #)
|
||||
_ -> let !(I# lenX) = max 4080 (I# req) in
|
||||
case Exts.newByteArray# lenX s0 of
|
||||
(# sX, bufX #) ->
|
||||
(# sX, bufX, 0#, lenX, Mutable buf0 off0 cs0 #)
|
||||
in case f buf1 off1 s1 of
|
||||
(# s2, off2 #) -> (# s2, buf1, off2, len1 -# (off2 -# off1), cs1 #)
|
||||
|
||||
-- This is a micro-optimization that uses an equality check instead
|
||||
-- of an inequality check when the required number of bytes is one.
|
||||
-- Use this instead of fromBounded (where possible) leads to marginally
|
||||
-- better results in benchmarks.
|
||||
fromBoundedOne ::
|
||||
Bounded.Builder 1
|
||||
-> Builder
|
||||
{-# inline fromBoundedOne #-}
|
||||
fromBoundedOne (UnsafeBounded.Builder f) = Builder $ \buf0 off0 len0 cs0 s0 ->
|
||||
let !(# s1, buf1, off1, len1, cs1 #) = case len0 of
|
||||
0# -> case Exts.newByteArray# 4080# s0 of
|
||||
(# sX, bufX #) ->
|
||||
(# sX, bufX, 0#, 4080#, Mutable buf0 off0 cs0 #)
|
||||
_ -> (# s0, buf0, off0, len0, cs0 #)
|
||||
in case f buf1 off1 s1 of
|
||||
(# s2, off2 #) -> (# s2, buf1, off2, len1 -# (off2 -# off1), cs1 #)
|
||||
|
||||
-- | Create a builder from an unsliced byte sequence.
|
||||
bytearray :: ByteArray -> Builder
|
||||
bytearray a = bytes (Bytes a 0 (PM.sizeofByteArray a))
|
||||
byteArray :: ByteArray -> Builder
|
||||
byteArray a = bytes (Bytes a 0 (PM.sizeofByteArray a))
|
||||
|
||||
-- | Create a builder from a sliced byte sequence.
|
||||
bytes :: Bytes -> Builder
|
||||
bytes (Bytes src soff slen) = construct $ \(MutableBytes arr off len) -> if len >= slen
|
||||
then do
|
||||
PM.copyByteArray arr off src soff slen
|
||||
pure (Just (off + slen))
|
||||
else pure Nothing
|
||||
bytes (Bytes (ByteArray src# ) (I# soff# ) slen@(I# slen# )) = Builder
|
||||
(\buf0 off0 len0 cs0 s0 -> if slen >= 1024
|
||||
then case Exts.newByteArray# 0# s0 of
|
||||
(# s1, buf1 #) -> (# s1, buf1, 0#, 0#, Immutable src# soff# slen# (Mutable buf0 off0 cs0) #)
|
||||
else case len0 <# slen# of
|
||||
1# -> case Exts.newByteArray# 4080# s0 of
|
||||
(# s1, buf1 #) -> case Exts.copyByteArray# src# soff# buf1 0# slen# s1 of
|
||||
s2 -> (# s2, buf1, slen#, 4080# -# slen#, Mutable buf0 off0 cs0 #)
|
||||
_ -> let !s1 = Exts.copyByteArray# src# soff# buf0 off0 slen# s0 in
|
||||
(# s1, buf0, off0 +# slen#, len0 -# slen#, cs0 #)
|
||||
)
|
||||
|
||||
-- | Create a builder from a slice of an array of 'Word8'. There is the same
|
||||
-- as 'bytes' but is provided as a convenience for users working with different
|
||||
|
@ -238,10 +188,7 @@ word8Array (PrimArray arr) off len = bytes (Bytes (ByteArray arr) off len)
|
|||
-- byte sequence is UTF-8 encoded text.
|
||||
slicedUtf8TextJson :: ByteArray# -> Int# -> Int# -> Builder
|
||||
{-# inline slicedUtf8TextJson #-}
|
||||
slicedUtf8TextJson !src# !soff0# !slen0# = construct $ \(MutableBytes dst doff0 dlen0) ->
|
||||
let slen0 = I# slen0#
|
||||
in if dlen0 > (2 * slen0) + 2
|
||||
then do
|
||||
slicedUtf8TextJson !src# !soff0# !slen0# = fromFunction reqLen $ \dst doff0 -> do
|
||||
PM.writeByteArray dst doff0 (c2w '"')
|
||||
let go !soff !slen !doff = if slen > 0
|
||||
then case indexChar8Array (ByteArray src#) soff of
|
||||
|
@ -261,8 +208,24 @@ slicedUtf8TextJson !src# !soff0# !slen0# = construct $ \(MutableBytes dst doff0
|
|||
else pure doff
|
||||
doffRes <- go (I# soff0#) (I# slen0#) (doff0 + 1)
|
||||
PM.writeByteArray dst doffRes (c2w '"')
|
||||
pure (Just (doffRes + 1))
|
||||
else pure Nothing
|
||||
pure (doffRes + 1)
|
||||
where
|
||||
slen0 = I# slen0#
|
||||
reqLen = (2 * slen0) + 2
|
||||
|
||||
-- | Constructor for 'Builder' that works on a function with lifted
|
||||
-- arguments instead of unlifted ones. This is just as unsafe as the
|
||||
-- actual constructor.
|
||||
fromFunction :: Int -> (forall s. MutableByteArray s -> Int -> ST s Int) -> Builder
|
||||
fromFunction (I# req) f = Builder $ \buf0 off0 len0 cs0 s0 ->
|
||||
let !(# s1, buf1, off1, len1, cs1 #) = case len0 >=# req of
|
||||
1# -> (# s0, buf0, off0, len0, cs0 #)
|
||||
_ -> let !(I# lenX) = max 4080 (I# req) in
|
||||
case Exts.newByteArray# lenX s0 of
|
||||
(# sX, bufX #) ->
|
||||
(# sX, bufX, 0#, lenX, Mutable buf0 off0 cs0 #)
|
||||
in case unST (f (MutableByteArray buf1) (I# off1)) s1 of
|
||||
(# s2, I# off2 #) -> (# s2, buf1, off2, len1 -# (off2 -# off1), cs1 #)
|
||||
|
||||
-- Internal. Write two characters in the ASCII plane to a byte array.
|
||||
write2 :: MutableByteArray s -> Int -> Char -> Char -> ST s ()
|
||||
|
@ -420,7 +383,7 @@ word8PaddedUpperHex w =
|
|||
-- | Encode an ASCII char.
|
||||
-- Precondition: Input must be an ASCII character. This is not checked.
|
||||
ascii :: Char -> Builder
|
||||
ascii c = fromBounded Nat.constant (Bounded.ascii c)
|
||||
ascii c = fromBoundedOne (Bounded.ascii c)
|
||||
|
||||
-- | Encode an UTF8 char. This only uses as much space as is required.
|
||||
char :: Char -> Builder
|
||||
|
@ -475,7 +438,7 @@ word16BE w = fromBounded Nat.constant (Bounded.word16BE w)
|
|||
|
||||
-- | Requires exactly 1 byte.
|
||||
word8 :: Word8 -> Builder
|
||||
word8 w = fromBounded Nat.constant (Bounded.word8 w)
|
||||
word8 w = fromBoundedOne (Bounded.word8 w)
|
||||
|
||||
-- | Prefix a builder with its size in bytes. This size is
|
||||
-- presented as a big-endian 32-bit word. The need to prefix
|
||||
|
@ -490,35 +453,54 @@ word8 w = fromBounded Nat.constant (Bounded.word8 w)
|
|||
-- However, using 'consLength32BE' is much more efficient here
|
||||
-- since it only materializes the 'ByteArray' once.
|
||||
consLength32BE :: Builder -> Builder
|
||||
consLength32BE (Builder f) = Builder $ \arr off len s0 -> case len >=# 4# of
|
||||
1# -> case f arr (off +# 4# ) (len -# 4# ) s0 of
|
||||
(# s1, r #) -> case r of
|
||||
(-1#) -> (# s1, (-1#) #)
|
||||
_ ->
|
||||
let ST g = UnsafeBounded.pasteST
|
||||
(Bounded.word32BE (fromIntegral ((I# r - I# off) - 4)))
|
||||
(MutableByteArray arr)
|
||||
(I# off)
|
||||
in case g s1 of
|
||||
(# s2, _ #) -> (# s2, r #)
|
||||
_ -> (# s0, (-1#) #)
|
||||
consLength32BE (Builder f) = Builder $ \buf0 off0 len0 cs0 s0 ->
|
||||
let !(# s1, buf1, off1, len1, cs1 #) = case len0 >=# 4# of
|
||||
1# -> (# s0, buf0, off0, len0, cs0 #)
|
||||
_ -> case Exts.newByteArray# 4080# s0 of
|
||||
(# sX, bufX #) ->
|
||||
(# sX, bufX, 0#, 4080#, Mutable buf0 off0 cs0 #)
|
||||
in case f buf1 (off1 +# 4# ) (len1 -# 4# ) cs1 s1 of
|
||||
(# s2, buf2, off2, len2, cs2 #) ->
|
||||
let !dist = case Exts.sameMutableByteArray# buf1 buf2 of
|
||||
1# -> off2 -# off1
|
||||
_ -> commitDistance buf1 off2 cs2 -# off1
|
||||
ST g = UnsafeBounded.pasteST
|
||||
(Bounded.word32BE (fromIntegral (I# (dist -# 4# ))))
|
||||
(MutableByteArray buf1)
|
||||
(I# off1)
|
||||
in case g s2 of
|
||||
(# s3, _ #) -> (# s3, buf2, off2, len2, cs2 #)
|
||||
|
||||
|
||||
-- | Prefix a builder with its size in bytes. This size is
|
||||
-- presented as a big-endian 64-bit word. See 'consLength32BE'.
|
||||
consLength64BE :: Builder -> Builder
|
||||
consLength64BE (Builder f) = Builder $ \arr off len s0 -> case len >=# 8# of
|
||||
1# -> case f arr (off +# 8# ) (len -# 8# ) s0 of
|
||||
(# s1, r #) -> case r of
|
||||
(-1#) -> (# s1, (-1#) #)
|
||||
_ ->
|
||||
let ST g = UnsafeBounded.pasteST
|
||||
(Bounded.word64BE (fromIntegral ((I# r - I# off) - 8)))
|
||||
(MutableByteArray arr)
|
||||
(I# off)
|
||||
in case g s1 of
|
||||
(# s2, _ #) -> (# s2, r #)
|
||||
_ -> (# s0, (-1#) #)
|
||||
consLength64BE (Builder f) = Builder $ \buf0 off0 len0 cs0 s0 ->
|
||||
let !(# s1, buf1, off1, len1, cs1 #) = case len0 >=# 8# of
|
||||
1# -> (# s0, buf0, off0, len0, cs0 #)
|
||||
_ -> case Exts.newByteArray# 4080# s0 of
|
||||
(# sX, bufX #) ->
|
||||
(# sX, bufX, 0#, 4080#, Mutable buf0 off0 cs0 #)
|
||||
in case f buf1 (off1 +# 8# ) (len1 -# 8# ) cs1 s1 of
|
||||
(# s2, buf2, off2, len2, cs2 #) ->
|
||||
let !dist = case Exts.sameMutableByteArray# buf1 buf2 of
|
||||
1# -> off2 -# off1
|
||||
_ -> commitDistance buf1 off2 cs2 -# off1
|
||||
ST g = UnsafeBounded.pasteST
|
||||
(Bounded.word64BE (fromIntegral (I# (dist -# 8# ))))
|
||||
(MutableByteArray buf1)
|
||||
(I# off1)
|
||||
in case g s2 of
|
||||
(# s3, _ #) -> (# s3, buf2, off2, len2, cs2 #)
|
||||
|
||||
commitDistance :: MutableByteArray# s -> Int# -> Commits s -> Int#
|
||||
commitDistance _ !_ Initial = error "chunkDistance: chunk not found"
|
||||
commitDistance target !n (Immutable _ _ len cs) =
|
||||
commitDistance target (n +# len) cs
|
||||
commitDistance target !n (Mutable buf len cs) =
|
||||
case Exts.sameMutableByteArray# target buf of
|
||||
1# -> n +# len
|
||||
_ -> commitDistance target (n +# len) cs
|
||||
|
||||
-- ShortText is already UTF-8 encoded. This is a no-op.
|
||||
shortTextToByteArray :: ShortText -> ByteArray
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
module Data.ByteArray.Builder.Unsafe
|
||||
( -- * Types
|
||||
Builder(..)
|
||||
, Commits(..)
|
||||
-- * Safe Functions
|
||||
-- | These functions are actually completely safe, but they are defined
|
||||
-- here because they are used by typeclass instances. Import them from
|
||||
|
@ -17,10 +18,10 @@ module Data.ByteArray.Builder.Unsafe
|
|||
, cstring
|
||||
) where
|
||||
|
||||
import Data.Primitive (MutableByteArray(MutableByteArray))
|
||||
import Data.Primitive (MutableByteArray(MutableByteArray),ByteArray)
|
||||
import Foreign.C.String (CString)
|
||||
import GHC.Exts ((-#),(+#),(/=#),(>#))
|
||||
import GHC.Exts (Addr#,Int(I#),Ptr(Ptr))
|
||||
import GHC.Exts ((-#),(+#),(>#))
|
||||
import GHC.Exts (Addr#,ByteArray#,MutableByteArray#,Int(I#),Ptr(Ptr))
|
||||
import GHC.Exts (IsString,Int#,State#,MutableByteArray#)
|
||||
import GHC.ST (ST(ST))
|
||||
import GHC.Base (unpackCString#,unpackCStringUtf8#)
|
||||
|
@ -32,10 +33,14 @@ import qualified Data.ByteArray.Builder.Bounded.Unsafe as UnsafeBounded
|
|||
-- | An unmaterialized sequence of bytes that may be pasted
|
||||
-- into a mutable byte array.
|
||||
newtype Builder
|
||||
= Builder (forall s. MutableByteArray# s -> Int# -> Int# -> State# s -> (# State# s, Int# #))
|
||||
-- ^ This function takes a buffer, an offset, and a number of remaining bytes.
|
||||
-- It returns the new offset (should be greater than the old offset), or if
|
||||
-- there was not enough space left in buffer, it returns -1.
|
||||
= Builder (forall s.
|
||||
MutableByteArray# s -> -- buffer we are currently writing to
|
||||
Int# -> -- offset into the current buffer
|
||||
Int# -> -- number of bytes remaining in the current buffer
|
||||
Commits s -> -- buffers and immutable byte slices that we have already committed
|
||||
State# s ->
|
||||
(# State# s, MutableByteArray# s, Int#, Int#, Commits s #) -- all the same things
|
||||
)
|
||||
|
||||
instance IsString Builder where
|
||||
{-# inline fromString #-}
|
||||
|
@ -43,53 +48,73 @@ instance IsString Builder where
|
|||
|
||||
instance Semigroup Builder where
|
||||
{-# inline (<>) #-}
|
||||
Builder f <> Builder g = Builder $ \arr off0 len0 s0 -> case f arr off0 len0 s0 of
|
||||
(# s1, r #) -> case r /=# (-1#) of
|
||||
1# -> g arr r (len0 +# (off0 -# r)) s1
|
||||
_ -> (# s1, (-1#) #)
|
||||
Builder f <> Builder g = Builder $ \buf0 off0 len0 cs0 s0 -> case f buf0 off0 len0 cs0 s0 of
|
||||
(# s1, buf1, off1, len1, cs1 #) -> g buf1 off1 len1 cs1 s1
|
||||
|
||||
instance Monoid Builder where
|
||||
{-# inline mempty #-}
|
||||
mempty = Builder $ \_ off0 _ s0 -> (# s0, off0 #)
|
||||
mempty = Builder $ \buf0 off0 len0 cs0 s0 -> (# s0, buf0, off0, len0, cs0 #)
|
||||
|
||||
data Commits s
|
||||
= Mutable
|
||||
(MutableByteArray# s)
|
||||
-- ^ Mutable buffer, start index implicitly zero
|
||||
Int# -- ^ Length (may be smaller than actual length)
|
||||
!(Commits s)
|
||||
| Immutable
|
||||
ByteArray# -- ^ Immutable chunk
|
||||
Int# -- ^ Offset into chunk, not necessarily zero
|
||||
Int# -- ^ Length (may be smaller than actual length)
|
||||
!(Commits s)
|
||||
| Initial
|
||||
|
||||
-- | Create a builder from a cons-list of 'Char'. These
|
||||
-- are be UTF-8 encoded.
|
||||
stringUtf8 :: String -> Builder
|
||||
{-# inline stringUtf8 #-}
|
||||
stringUtf8 cs = Builder (\arr off0 len0 s0 -> goString cs arr off0 len0 s0)
|
||||
stringUtf8 cs = Builder (goString cs)
|
||||
|
||||
-- | Create a builder from a @NUL@-terminated 'CString'. This ignores any
|
||||
-- textual encoding, copying bytes until @NUL@ is reached.
|
||||
cstring :: CString -> Builder
|
||||
{-# inline cstring #-}
|
||||
cstring (Ptr cs) = Builder (\arr off0 len0 s0 -> goCString cs arr off0 len0 s0)
|
||||
cstring (Ptr cs) = Builder (goCString cs)
|
||||
|
||||
goString :: String -> MutableByteArray# s -> Int# -> Int# -> State# s -> (# State# s, Int# #)
|
||||
goString :: String
|
||||
-> MutableByteArray# s -> Int# -> Int# -> Commits s
|
||||
-> State# s -> (# State# s, MutableByteArray# s, Int#, Int#, Commits s #)
|
||||
{-# noinline goString #-}
|
||||
goString [] _ off0 _ s0 = (# s0, off0 #)
|
||||
goString (c : cs) buf off0 len0 s0 = case len0 ># 3# of
|
||||
1# -> case unST (UnsafeBounded.pasteST (Bounded.char c) (MutableByteArray buf) (I# off0)) s0 of
|
||||
(# s1, I# off1 #) -> goString cs buf off1 (len0 -# (off1 -# off0)) s1
|
||||
_ -> (# s0, (-1#) #)
|
||||
goString [] buf0 off0 len0 cs0 s0 = (# s0, buf0, off0, len0, cs0 #)
|
||||
goString (c : cs) buf0 off0 len0 cs0 s0 = case len0 ># 3# of
|
||||
1# -> case unST (UnsafeBounded.pasteST (Bounded.char c) (MutableByteArray buf0) (I# off0)) s0 of
|
||||
(# s1, I# off1 #) -> goString cs buf0 off1 (len0 -# (off1 -# off0)) cs0 s1
|
||||
_ -> case Exts.newByteArray# 4080# s0 of
|
||||
(# s1, buf1 #) -> case unST (UnsafeBounded.pasteST (Bounded.char c) (MutableByteArray buf1) 0) s1 of
|
||||
(# s2, I# off1 #) -> goString cs buf1 off1 (4080# -# off1) (Mutable buf0 off0 cs0) s2
|
||||
|
||||
-- We have to have a rule for both unpackCString# and unpackCStringUtf8#
|
||||
-- since GHC uses a different function based on whether or not non-ASCII
|
||||
-- codepoints are used in the string.
|
||||
{-# RULES
|
||||
"Builder stringUtf8/cstring" forall s a b c d.
|
||||
goString (unpackCString# s) a b c d = goCString s a b c d
|
||||
"Builder stringUtf8/cstring-utf8" forall s a b c d.
|
||||
goString (unpackCStringUtf8# s) a b c d = goCString s a b c d
|
||||
"Builder stringUtf8/cstring" forall s a b c d e.
|
||||
goString (unpackCString# s) a b c d e = goCString s a b c d e
|
||||
"Builder stringUtf8/cstring-utf8" forall s a b c d e.
|
||||
goString (unpackCStringUtf8# s) a b c d e = goCString s a b c d e
|
||||
#-}
|
||||
|
||||
goCString :: Addr# -> MutableByteArray# s -> Int# -> Int# -> State# s -> (# State# s, Int# #)
|
||||
goCString addr buf off0 len0 s0 = case Exts.indexWord8OffAddr# addr 0# of
|
||||
0## -> (# s0, off0 #)
|
||||
goCString :: Addr# -> MutableByteArray# s -> Int# -> Int# -> Commits s
|
||||
-> State# s -> (# State# s, MutableByteArray# s, Int#, Int#, Commits s #)
|
||||
goCString addr buf0 off0 len0 cs0 s0 = case Exts.indexWord8OffAddr# addr 0# of
|
||||
0## -> (# s0, buf0, off0, len0, cs0 #)
|
||||
w -> case len0 of
|
||||
0# -> (# s0, (-1#) #)
|
||||
_ -> case Exts.writeWord8Array# buf off0 w s0 of
|
||||
s1 -> goCString (Exts.plusAddr# addr 1# ) buf (off0 +# 1# ) (len0 -# 1# ) s1
|
||||
0# -> case Exts.newByteArray# 4080# s0 of
|
||||
(# s1, buf1 #) -> case Exts.writeWord8Array# buf1 0# w s1 of
|
||||
s2 -> goCString
|
||||
(Exts.plusAddr# addr 1# ) buf1 1# (4080# -# 1# )
|
||||
(Mutable buf0 off0 cs0)
|
||||
s2
|
||||
_ -> case Exts.writeWord8Array# buf0 off0 w s0 of
|
||||
s1 -> goCString (Exts.plusAddr# addr 1# ) buf0 (off0 +# 1# ) (len0 -# 1# ) cs0 s1
|
||||
|
||||
unST :: ST s a -> State# s -> (# State# s, a #)
|
||||
unST (ST f) = f
|
||||
|
||||
|
|
76
src/Data/Bytes/Chunks.hs
Normal file
76
src/Data/Bytes/Chunks.hs
Normal file
|
@ -0,0 +1,76 @@
|
|||
{-# language BangPatterns #-}
|
||||
{-# language DerivingStrategies #-}
|
||||
{-# language TypeFamilies #-}
|
||||
{-# language MagicHash #-}
|
||||
{-# language UnboxedTuples #-}
|
||||
{-# language NamedFieldPuns #-}
|
||||
|
||||
module Data.Bytes.Chunks
|
||||
( Chunks(..)
|
||||
, concat
|
||||
) where
|
||||
|
||||
import Prelude hiding (length,concat)
|
||||
|
||||
import GHC.ST (ST(..))
|
||||
import Data.Bytes.Types (Bytes(..))
|
||||
import Data.Primitive (ByteArray(..),MutableByteArray(..))
|
||||
import GHC.Exts (ByteArray#,MutableByteArray#)
|
||||
import GHC.Exts (IsList,Int#,State#,Int(I#),(+#),(-#))
|
||||
import Control.Monad.ST.Run (runByteArrayST)
|
||||
|
||||
import qualified GHC.Exts as Exts
|
||||
import qualified Data.Primitive as PM
|
||||
|
||||
data Chunks
|
||||
= ChunksCons {-# UNPACK #-} !Bytes !Chunks
|
||||
| ChunksNil
|
||||
|
||||
concat :: Chunks -> ByteArray
|
||||
concat x = ByteArray (concat# x)
|
||||
|
||||
concat# :: Chunks -> ByteArray#
|
||||
{-# noinline concat# #-}
|
||||
concat# ChunksNil = case mempty of {ByteArray x -> x}
|
||||
concat# (ChunksCons (Bytes{array=c,offset=coff,length=szc}) cs) = case cs of
|
||||
ChunksNil -> case c of {ByteArray x -> x}
|
||||
ChunksCons (Bytes{array=d,offset=doff,length=szd}) ds ->
|
||||
unBa $ runByteArrayST $ do
|
||||
let szboth = szc + szd
|
||||
len = chunksLengthGo szboth ds
|
||||
dst <- PM.newByteArray len
|
||||
PM.copyByteArray dst 0 c coff szc
|
||||
PM.copyByteArray dst szc d doff szd
|
||||
_ <- copy dst szboth ds
|
||||
PM.unsafeFreezeByteArray dst
|
||||
|
||||
chunksLengthGo :: Int -> Chunks -> Int
|
||||
chunksLengthGo !n ChunksNil = n
|
||||
chunksLengthGo !n (ChunksCons (Bytes{length}) cs) =
|
||||
chunksLengthGo (n + length) cs
|
||||
|
||||
-- | Copy the contents of the chunks into a mutable array.
|
||||
-- Precondition: The destination must have enough space to
|
||||
-- house the contents. This is not checked.
|
||||
copy ::
|
||||
MutableByteArray s -- ^ Destination
|
||||
-> Int -- ^ Destination offset
|
||||
-> Chunks -- ^ Source
|
||||
-> ST s Int -- ^ Returns the next index into the destination after the payload
|
||||
{-# inline copy #-}
|
||||
copy (MutableByteArray dst) (I# off) cs = ST
|
||||
(\s0 -> case copy# dst off cs s0 of
|
||||
(# s1, nextOff #) -> (# s1, I# nextOff #)
|
||||
)
|
||||
|
||||
copy# :: MutableByteArray# s -> Int# -> Chunks -> State# s -> (# State# s, Int# #)
|
||||
copy# _ off ChunksNil s0 = (# s0, off #)
|
||||
copy# marr off (ChunksCons (Bytes{array,offset,length}) cs) s0 =
|
||||
case Exts.copyByteArray# (unBa array) (unI offset) marr off (unI length) s0 of
|
||||
s1 -> copy# marr (off +# unI length) cs s1
|
||||
|
||||
unI :: Int -> Int#
|
||||
unI (I# i) = i
|
||||
|
||||
unBa :: ByteArray -> ByteArray#
|
||||
unBa (ByteArray x) = x
|
109
test/Main.hs
109
test/Main.hs
|
@ -18,6 +18,7 @@ import qualified Arithmetic.Nat as Nat
|
|||
import qualified Data.ByteString as ByteString
|
||||
import qualified Data.ByteString.Builder as BB
|
||||
import qualified Data.ByteString.Lazy.Char8 as LB
|
||||
import qualified Data.Bytes.Chunks as Chunks
|
||||
import qualified Data.List as L
|
||||
import qualified Data.Primitive as PM
|
||||
import qualified Data.Text as T
|
||||
|
@ -37,79 +38,99 @@ tests :: TestTree
|
|||
tests = testGroup "Tests"
|
||||
[ testGroup "live"
|
||||
[ TQC.testProperty "word64Dec" $ \w ->
|
||||
run 1 (word64Dec w) === pack (show w)
|
||||
runConcat 1 (word64Dec w) === pack (show w)
|
||||
, TQC.testProperty "word64Dec-x3" $ \x y z ->
|
||||
run 1 (word64Dec x <> word64Dec y <> word64Dec z)
|
||||
runConcat 1 (word64Dec x <> word64Dec y <> word64Dec z)
|
||||
===
|
||||
pack (show x ++ show y ++ show z)
|
||||
, TQC.testProperty "int64Dec-x3" $ \x y z ->
|
||||
run 1 (int64Dec x <> int64Dec y <> int64Dec z)
|
||||
runConcat 1 (int64Dec x <> int64Dec y <> int64Dec z)
|
||||
===
|
||||
pack (show x ++ show y ++ show z)
|
||||
, TQC.testProperty "word64BE-x3" $ \x y z ->
|
||||
run 1 (word64BE x <> word64BE y <> word64BE z)
|
||||
runConcat 1 (word64BE x <> word64BE y <> word64BE z)
|
||||
===
|
||||
pack (LB.unpack (BB.toLazyByteString (BB.word64BE x <> BB.word64BE y <> BB.word64BE z)))
|
||||
, TQC.testProperty "word64PaddedUpperHex" $ \w ->
|
||||
run 1 (word64PaddedUpperHex w)
|
||||
runConcat 1 (word64PaddedUpperHex w)
|
||||
===
|
||||
pack (showWord64PaddedUpperHex w)
|
||||
, TQC.testProperty "word8Dec" $ \w ->
|
||||
run 1 (word8Dec w)
|
||||
runConcat 1 (word8Dec w)
|
||||
===
|
||||
pack (show w)
|
||||
, TQC.testProperty "consLength32BE" $ \w ->
|
||||
run 1 (consLength32BE (word8Dec w))
|
||||
runConcat 1 (consLength32BE (word8Dec w))
|
||||
===
|
||||
pack ('\x00' : '\x00' : '\x00' : chr (L.length (show w)) : show w)
|
||||
, TQC.testProperty "consLength64BE" $ \w ->
|
||||
run 1 (consLength64BE (word16Dec w))
|
||||
pack
|
||||
( '\x00' : '\x00' : '\x00' : '\x00'
|
||||
: '\x00' : '\x00' : '\x00' : chr (L.length (show w))
|
||||
: show w
|
||||
)
|
||||
===
|
||||
pack ('\x00' : '\x00' : '\x00' : '\x00' : '\x00' : '\x00' : '\x00' : chr (L.length (show w)) : show w)
|
||||
, TQC.testProperty "pasteArrayST" $ \(xs :: [Word64]) ->
|
||||
(runArray word64Dec (V.fromList xs))
|
||||
===
|
||||
pack (foldMap show xs)
|
||||
runConcat 1 (consLength64BE (word16Dec w))
|
||||
, THU.testCase "stringUtf8" $
|
||||
packUtf8 "¿Cómo estás? I am doing well." @=?
|
||||
run 1 (stringUtf8 "¿Cómo estás? I am doing well.")
|
||||
runConcat 1 (stringUtf8 "¿Cómo estás? I am doing well.")
|
||||
, THU.testCase "doubleDec-A" $
|
||||
pack (show (2 :: Int)) @=? run 1 (doubleDec 2.0)
|
||||
pack (show (2 :: Int)) @=? runConcat 1 (doubleDec 2.0)
|
||||
, THU.testCase "doubleDec-B" $
|
||||
pack (show (2.5 :: Double)) @=? run 1 (doubleDec 2.5)
|
||||
pack (show (2.5 :: Double)) @=? runConcat 1 (doubleDec 2.5)
|
||||
, THU.testCase "doubleDec-C" $
|
||||
pack ("1e+15") @=? run 1 (doubleDec 1e15)
|
||||
pack ("1e+15") @=? runConcat 1 (doubleDec 1e15)
|
||||
, THU.testCase "doubleDec-D" $
|
||||
pack ("-42") @=? run 1 (doubleDec (-42))
|
||||
pack ("-42") @=? runConcat 1 (doubleDec (-42))
|
||||
, THU.testCase "doubleDec-E" $
|
||||
pack ("-8.88888888888888e+14") @=? run 1 (doubleDec (-888888888888888.8888888))
|
||||
pack ("-8.88888888888888e+14") @=? runConcat 1 (doubleDec (-888888888888888.8888888))
|
||||
, THU.testCase "doubleDec-F" $
|
||||
pack ("42") @=? run 1 (doubleDec 42)
|
||||
pack ("42") @=? runConcat 1 (doubleDec 42)
|
||||
, THU.testCase "doubleDec-G" $
|
||||
pack ("0") @=? run 1 (doubleDec 0)
|
||||
pack ("0") @=? runConcat 1 (doubleDec 0)
|
||||
, THU.testCase "doubleDec-H" $
|
||||
pack ("0.5") @=? run 1 (doubleDec 0.5)
|
||||
pack ("0.5") @=? runConcat 1 (doubleDec 0.5)
|
||||
, THU.testCase "doubleDec-I" $
|
||||
pack ("-0.5") @=? run 1 (doubleDec (-0.5))
|
||||
pack ("-0.5") @=? runConcat 1 (doubleDec (-0.5))
|
||||
, THU.testCase "doubleDec-J" $
|
||||
pack ("999999999") @=? run 1 (doubleDec 999999999)
|
||||
pack ("999999999") @=? runConcat 1 (doubleDec 999999999)
|
||||
, THU.testCase "doubleDec-K" $
|
||||
pack ("-99999999") @=? run 1 (doubleDec (-99999999))
|
||||
pack ("-99999999") @=? runConcat 1 (doubleDec (-99999999))
|
||||
, THU.testCase "shortTextJsonString-A" $
|
||||
pack ("\"hello\"") @=? run 1 (shortTextJsonString "hello")
|
||||
pack ("\"hello\"") @=? runConcat 1 (shortTextJsonString "hello")
|
||||
, THU.testCase "shortTextJsonString-B" $
|
||||
pack ("\"\\\\_\\\"_/\"") @=? run 1 (shortTextJsonString "\\_\"_/")
|
||||
pack ("\"\\\\_\\\"_/\"") @=? runConcat 1 (shortTextJsonString "\\_\"_/")
|
||||
, THU.testCase "shortTextJsonString-C" $
|
||||
pack ("\"Hi\\r\\nLo\"") @=? run 1 (shortTextJsonString "Hi\r\nLo")
|
||||
pack ("\"Hi\\r\\nLo\"") @=? runConcat 1 (shortTextJsonString "Hi\r\nLo")
|
||||
, THU.testCase "shortTextJsonString-D" $
|
||||
pack ("\"Hi\\u001BLo\"") @=? run 1 (shortTextJsonString "Hi\ESCLo")
|
||||
pack ("\"Hi\\u001BLo\"") @=? runConcat 1 (shortTextJsonString "Hi\ESCLo")
|
||||
, THU.testCase "word-16-tree" $
|
||||
Word16Tree.expectedSmall @=? run 1
|
||||
Word16Tree.expectedSmall @=? runConcat 1
|
||||
(Word16Tree.encode Word16Tree.exampleSmall)
|
||||
, THU.testCase "byteArray-small" $
|
||||
let a = replicateByte 3 0x50
|
||||
b = replicateByte 5 0x51
|
||||
in mconcat [a,b] @=? runConcat 1
|
||||
( byteArray a <> byteArray b )
|
||||
, THU.testCase "byteArray-big" $
|
||||
let a = replicateByte 2105 0x50
|
||||
b = replicateByte 725 0x51
|
||||
c = replicateByte 900 0x52
|
||||
d = replicateByte 800 0x53
|
||||
e = replicateByte 700 0x54
|
||||
f = replicateByte 950 0x55
|
||||
g = replicateByte 975 0x56
|
||||
h = replicateByte 3000 0x57
|
||||
i = replicateByte 125 0x58
|
||||
in mconcat [a,b,c,d,e,f,g,h,i] @=? runConcat 1
|
||||
( byteArray a <> byteArray b <> byteArray c <>
|
||||
byteArray d <> byteArray e <> byteArray f <>
|
||||
byteArray g <> byteArray h <> byteArray i
|
||||
)
|
||||
]
|
||||
, testGroup "alternate"
|
||||
[ TQC.testProperty "HexWord64" $ \x y ->
|
||||
run 1
|
||||
runConcat 1
|
||||
( fromBounded Nat.constant (HexWord64.word64PaddedUpperHex x)
|
||||
<> fromBounded Nat.constant (HexWord64.word64PaddedUpperHex y)
|
||||
)
|
||||
|
@ -118,28 +139,20 @@ tests = testGroup "Tests"
|
|||
]
|
||||
]
|
||||
|
||||
replicateByte :: Int -> Word8 -> ByteArray
|
||||
replicateByte n w = runST $ do
|
||||
m <- PM.newByteArray n
|
||||
PM.setByteArray m 0 n w
|
||||
PM.unsafeFreezeByteArray m
|
||||
|
||||
pack :: String -> ByteArray
|
||||
pack = Exts.fromList . map (fromIntegral @Int @Word8 . ord)
|
||||
|
||||
packUtf8 :: String -> ByteArray
|
||||
packUtf8 = Exts.fromList . ByteString.unpack . TE.encodeUtf8 . T.pack
|
||||
|
||||
-- This is used to test pasteArrayST
|
||||
runArray ::
|
||||
(a -> Builder) -- ^ Builder
|
||||
-> V.Vector a -- ^ Elements to serialize
|
||||
-> ByteArray -- ^ Number of elements serialized, serialization
|
||||
runArray f !xs = runST $ do
|
||||
let go !v0 !sz !chunks = if V.null v0
|
||||
then pure (mconcat (L.reverse chunks))
|
||||
else do
|
||||
arr <- PM.newByteArray sz
|
||||
(v1,MutableBytes _ off _) <- pasteArrayST (MutableBytes arr 0 sz) f v0
|
||||
-- If nothing was serialized, we need a bigger buffer
|
||||
let szNext = if V.length v0 == V.length v1 then sz + 1 else sz
|
||||
c <- PM.unsafeFreezeByteArray =<< PM.resizeMutableByteArray arr off
|
||||
go v1 szNext (c : chunks)
|
||||
go xs 1 []
|
||||
|
||||
showWord64PaddedUpperHex :: Word64 -> String
|
||||
showWord64PaddedUpperHex = printf "%016X"
|
||||
|
||||
runConcat :: Int -> Builder -> ByteArray
|
||||
runConcat n = Chunks.concat . run n
|
||||
|
|
Loading…
Reference in a new issue