Add flush, copy, and insert
This commit is contained in:
parent
8baf4cc369
commit
c84f6cbcca
4 changed files with 100 additions and 21 deletions
|
@ -1,11 +1,13 @@
|
||||||
# Revision history for small-bytearray-builder
|
# Revision history for small-bytearray-builder
|
||||||
|
|
||||||
## 0.2.2.0 -- 2019-??-??
|
## 0.3.0.0 -- 2019-??-??
|
||||||
|
|
||||||
* Introduce `consLensBE32` for efficient serialization of wire protocols
|
* Introduce `consLensBE32` for efficient serialization of wire protocols
|
||||||
that require prefixing a payload with its length.
|
that require prefixing a payload with its length.
|
||||||
* Add `int64BE` as a convenience.
|
* Add `int64BE` as a convenience.
|
||||||
* Add little-endian encoding functions for `Word16`, `Word32`, and `Word64`.
|
* Add little-endian encoding functions for `Word16`, `Word32`, and `Word64`.
|
||||||
|
* Add `flush`, `copy`, and `insert` for better control when
|
||||||
|
converting byte sequences to builders.
|
||||||
|
|
||||||
## 0.2.1.0 -- 2019-09-05
|
## 0.2.1.0 -- 2019-09-05
|
||||||
|
|
||||||
|
|
|
@ -46,7 +46,6 @@ library
|
||||||
, byteslice >=0.1 && <0.2
|
, byteslice >=0.1 && <0.2
|
||||||
, primitive-offset >=0.2 && <0.3
|
, primitive-offset >=0.2 && <0.3
|
||||||
, run-st >=0.1 && <0.2
|
, run-st >=0.1 && <0.2
|
||||||
, vector >=0.12.0.3 && <0.13
|
|
||||||
, bytestring >=0.10.8.2 && <0.11
|
, bytestring >=0.10.8.2 && <0.11
|
||||||
, text-short >=0.1.3 && <0.2
|
, text-short >=0.1.3 && <0.2
|
||||||
, natural-arithmetic >=0.1 && <0.2
|
, natural-arithmetic >=0.1 && <0.2
|
||||||
|
|
|
@ -15,6 +15,8 @@ module Data.ByteArray.Builder
|
||||||
, run
|
, run
|
||||||
-- * Materialized Byte Sequences
|
-- * Materialized Byte Sequences
|
||||||
, bytes
|
, bytes
|
||||||
|
, copy
|
||||||
|
, insert
|
||||||
, byteArray
|
, byteArray
|
||||||
, shortTextUtf8
|
, shortTextUtf8
|
||||||
, shortTextJsonString
|
, shortTextJsonString
|
||||||
|
@ -62,39 +64,41 @@ module Data.ByteArray.Builder
|
||||||
, int64LE
|
, int64LE
|
||||||
-- *** Many
|
-- *** Many
|
||||||
, word8Array
|
, word8Array
|
||||||
|
-- **** Little Endian
|
||||||
|
, word16ArrayLE
|
||||||
-- ** Prefixing with Length
|
-- ** Prefixing with Length
|
||||||
, consLength32BE
|
, consLength32BE
|
||||||
, consLength64BE
|
, consLength64BE
|
||||||
-- * Encode Floating-Point Types
|
-- * Encode Floating-Point Types
|
||||||
-- ** Human-Readable
|
-- ** Human-Readable
|
||||||
, doubleDec
|
, doubleDec
|
||||||
|
-- * Control
|
||||||
|
, flush
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import Control.Monad.Primitive (primitive_)
|
import Control.Monad.Primitive (primitive_)
|
||||||
import Control.Monad.ST (ST,stToIO,runST)
|
import Control.Monad.ST (ST,runST)
|
||||||
import Control.Monad.ST.Run (runByteArrayST)
|
|
||||||
import Data.ByteArray.Builder.Unsafe (Builder(Builder))
|
import Data.ByteArray.Builder.Unsafe (Builder(Builder))
|
||||||
import Data.ByteArray.Builder.Unsafe (Commits(Initial,Mutable,Immutable))
|
import Data.ByteArray.Builder.Unsafe (Commits(Initial,Mutable,Immutable))
|
||||||
import Data.ByteArray.Builder.Unsafe (stringUtf8,cstring)
|
import Data.ByteArray.Builder.Unsafe (stringUtf8,cstring)
|
||||||
import Data.ByteString.Short.Internal (ShortByteString(SBS))
|
import Data.ByteString.Short.Internal (ShortByteString(SBS))
|
||||||
import Data.Bytes.Types (Bytes(Bytes),MutableBytes(MutableBytes))
|
import Data.Bytes.Types (Bytes(Bytes))
|
||||||
import Data.Char (ord)
|
import Data.Char (ord)
|
||||||
import Data.Int (Int64,Int32,Int16,Int8)
|
import Data.Int (Int64,Int32,Int16,Int8)
|
||||||
import Data.Primitive (ByteArray(..),MutableByteArray(..),PrimArray(..))
|
import Data.Primitive (ByteArray(..),MutableByteArray(..),PrimArray(..))
|
||||||
import Data.Primitive.ByteArray.Offset (MutableByteArrayOffset(..))
|
|
||||||
import Data.Text.Short (ShortText)
|
import Data.Text.Short (ShortText)
|
||||||
import Data.Word (Word64,Word32,Word16,Word8)
|
import Data.Word (Word64,Word32,Word16,Word8)
|
||||||
import GHC.Exts (Int(I#),Char(C#),Int#,State#,ByteArray#,RealWorld,(>=#),(/=#))
|
import GHC.Exts (Int(I#),Char(C#),Int#,State#,ByteArray#,(>=#))
|
||||||
import GHC.Exts (MutableByteArray#,(+#),(-#),(<#))
|
import GHC.Exts (MutableByteArray#,(+#),(-#),(<#))
|
||||||
import GHC.ST (ST(ST))
|
import GHC.ST (ST(ST))
|
||||||
import Data.Bytes.Chunks (Chunks(..))
|
import Data.Bytes.Chunks (Chunks(..))
|
||||||
|
import GHC.ByteOrder (ByteOrder(BigEndian,LittleEndian),targetByteOrder)
|
||||||
|
|
||||||
import qualified Arithmetic.Nat as Nat
|
import qualified Arithmetic.Nat as Nat
|
||||||
import qualified Arithmetic.Types as Arithmetic
|
import qualified Arithmetic.Types as Arithmetic
|
||||||
import qualified GHC.Exts as Exts
|
import qualified GHC.Exts as Exts
|
||||||
import qualified Data.Text.Short as TS
|
import qualified Data.Text.Short as TS
|
||||||
import qualified Data.Primitive as PM
|
import qualified Data.Primitive as PM
|
||||||
import qualified Data.Vector as V
|
|
||||||
import qualified Data.ByteArray.Builder.Bounded as Bounded
|
import qualified Data.ByteArray.Builder.Bounded as Bounded
|
||||||
import qualified Data.ByteArray.Builder.Bounded.Unsafe as UnsafeBounded
|
import qualified Data.ByteArray.Builder.Bounded.Unsafe as UnsafeBounded
|
||||||
|
|
||||||
|
@ -164,26 +168,74 @@ fromBoundedOne (UnsafeBounded.Builder f) = Builder $ \buf0 off0 len0 cs0 s0 ->
|
||||||
byteArray :: ByteArray -> Builder
|
byteArray :: ByteArray -> Builder
|
||||||
byteArray a = bytes (Bytes a 0 (PM.sizeofByteArray a))
|
byteArray a = bytes (Bytes a 0 (PM.sizeofByteArray a))
|
||||||
|
|
||||||
-- | Create a builder from a sliced byte sequence.
|
-- | Create a builder from a sliced byte sequence. The variants
|
||||||
|
-- 'copy' and 'insert' provide more control over whether or not
|
||||||
|
-- the byte sequence is copied or aliased. This function is preferred
|
||||||
|
-- when the user does not know the size of the byte sequence.
|
||||||
bytes :: Bytes -> Builder
|
bytes :: Bytes -> Builder
|
||||||
bytes (Bytes (ByteArray src# ) (I# soff# ) slen@(I# slen# )) = Builder
|
bytes (Bytes (ByteArray src# ) (I# soff# ) (I# slen# )) = Builder
|
||||||
(\buf0 off0 len0 cs0 s0 -> if slen >= 1024
|
-- There are three cases to consider: (1) there is not enough
|
||||||
then case Exts.newByteArray# 0# s0 of
|
-- space and (1a) the chunk is not small or (1b) the chunk is
|
||||||
|
-- small; (2) There is enough space for a copy.
|
||||||
|
(\buf0 off0 len0 cs0 s0 -> case len0 <# slen# of
|
||||||
|
1# -> case slen# >=# 256# of
|
||||||
|
1# -> case Exts.newByteArray# 0# s0 of
|
||||||
(# s1, buf1 #) -> (# s1, buf1, 0#, 0#, Immutable src# soff# slen# (Mutable buf0 off0 cs0) #)
|
(# s1, buf1 #) -> (# s1, buf1, 0#, 0#, Immutable src# soff# slen# (Mutable buf0 off0 cs0) #)
|
||||||
else case len0 <# slen# of
|
_ -> case Exts.newByteArray# 4080# s0 of
|
||||||
1# -> case Exts.newByteArray# 4080# s0 of
|
|
||||||
(# s1, buf1 #) -> case Exts.copyByteArray# src# soff# buf1 0# slen# s1 of
|
(# s1, buf1 #) -> case Exts.copyByteArray# src# soff# buf1 0# slen# s1 of
|
||||||
s2 -> (# s2, buf1, slen#, 4080# -# slen#, Mutable buf0 off0 cs0 #)
|
s2 -> (# s2, buf1, slen#, 4080# -# slen#, Mutable buf0 off0 cs0 #)
|
||||||
_ -> let !s1 = Exts.copyByteArray# src# soff# buf0 off0 slen# s0 in
|
_ -> let !s1 = Exts.copyByteArray# src# soff# buf0 off0 slen# s0 in
|
||||||
(# s1, buf0, off0 +# slen#, len0 -# slen#, cs0 #)
|
(# s1, buf0, off0 +# slen#, len0 -# slen#, cs0 #)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
-- | Create a builder from a byte sequence. This always results in a
|
||||||
|
-- call to @memcpy@. This is beneficial when the byte sequence is
|
||||||
|
-- known to be small (less than 256 bytes).
|
||||||
|
copy :: Bytes -> Builder
|
||||||
|
copy (Bytes (ByteArray src# ) (I# soff# ) (I# slen# )) = Builder
|
||||||
|
(\buf0 off0 len0 cs0 s0 -> case len0 <# slen# of
|
||||||
|
1# -> case Exts.newByteArray# newSz s0 of
|
||||||
|
(# s1, buf1 #) -> case Exts.copyByteArray# src# soff# buf1 0# slen# s1 of
|
||||||
|
s2 -> (# s2, buf1, slen#, newSz -# slen#, Mutable buf0 off0 cs0 #)
|
||||||
|
_ -> let !s1 = Exts.copyByteArray# src# soff# buf0 off0 slen# s0 in
|
||||||
|
(# s1, buf0, off0 +# slen#, len0 -# slen#, cs0 #)
|
||||||
|
)
|
||||||
|
where
|
||||||
|
!(I# newSz) = max (I# slen#) 4080
|
||||||
|
|
||||||
|
-- | Create a builder from a byte sequence. This never calls @memcpy@.
|
||||||
|
-- Instead, it pushes a chunk that references the argument byte sequence.
|
||||||
|
-- This wastes the remaining space in the active chunk, so it may adversely
|
||||||
|
-- affect performance if used carelessly. See 'flush' for a way to mitigate
|
||||||
|
-- this problem. This functions is most beneficial when the byte sequence
|
||||||
|
-- is known to be large (more than 8192 bytes).
|
||||||
|
insert :: Bytes -> Builder
|
||||||
|
insert (Bytes (ByteArray src# ) (I# soff# ) (I# slen# )) = Builder
|
||||||
|
(\buf0 off0 _ cs0 s0 -> case Exts.newByteArray# 0# s0 of
|
||||||
|
(# s1, buf1 #) ->
|
||||||
|
(# s1, buf1, 0#, 0#, Immutable src# soff# slen# (Mutable buf0 off0 cs0) #)
|
||||||
|
)
|
||||||
|
|
||||||
-- | Create a builder from a slice of an array of 'Word8'. There is the same
|
-- | Create a builder from a slice of an array of 'Word8'. There is the same
|
||||||
-- as 'bytes' but is provided as a convenience for users working with different
|
-- as 'bytes' but is provided as a convenience for users working with different
|
||||||
-- types.
|
-- types.
|
||||||
word8Array :: PrimArray Word8 -> Int -> Int -> Builder
|
word8Array :: PrimArray Word8 -> Int -> Int -> Builder
|
||||||
word8Array (PrimArray arr) off len = bytes (Bytes (ByteArray arr) off len)
|
word8Array (PrimArray arr) off len = bytes (Bytes (ByteArray arr) off len)
|
||||||
|
|
||||||
|
word16ArrayLE :: PrimArray Word16 -> Int -> Int -> Builder
|
||||||
|
word16ArrayLE src@(PrimArray arr) soff0 slen0 = case targetByteOrder of
|
||||||
|
LittleEndian -> bytes (Bytes (ByteArray arr) (soff0 * 2) (slen0 * 2))
|
||||||
|
BigEndian -> fromFunction (slen0 * 2) (go soff0 (soff0 + slen0))
|
||||||
|
where
|
||||||
|
go :: Int -> Int -> MutableByteArray s -> Int -> ST s Int
|
||||||
|
go !soff !send !dst !doff = if soff < send
|
||||||
|
then do
|
||||||
|
doff' <- UnsafeBounded.pasteST
|
||||||
|
(Bounded.word16LE (PM.indexPrimArray src soff))
|
||||||
|
dst doff
|
||||||
|
go (soff + 1) send dst doff'
|
||||||
|
else pure doff
|
||||||
|
|
||||||
-- Internal function. Precondition, the referenced slice of the
|
-- Internal function. Precondition, the referenced slice of the
|
||||||
-- byte sequence is UTF-8 encoded text.
|
-- byte sequence is UTF-8 encoded text.
|
||||||
slicedUtf8TextJson :: ByteArray# -> Int# -> Int# -> Builder
|
slicedUtf8TextJson :: ByteArray# -> Int# -> Int# -> Builder
|
||||||
|
@ -502,6 +554,25 @@ commitDistance target !n (Mutable buf len cs) =
|
||||||
1# -> n +# len
|
1# -> n +# len
|
||||||
_ -> commitDistance target (n +# len) cs
|
_ -> commitDistance target (n +# len) cs
|
||||||
|
|
||||||
|
-- | Push the buffer currently being filled onto the chunk list,
|
||||||
|
-- allocating a new active buffer of the requested size. This is
|
||||||
|
-- helpful when a small builder is sandwhiched between two large
|
||||||
|
-- zero-copy builders:
|
||||||
|
--
|
||||||
|
-- > insert bigA <> flush 1 <> word8 0x42 <> insert bigB
|
||||||
|
--
|
||||||
|
-- Without @flush 1@, @word8 0x42@ would see the zero-byte active
|
||||||
|
-- buffer that 'insert' returned, decide that it needed more space,
|
||||||
|
-- and allocate a 4080-byte buffer to which only a single byte
|
||||||
|
-- would be written.
|
||||||
|
flush :: Int -> Builder
|
||||||
|
flush !reqSz = Builder $ \buf0 off0 _ cs0 s0 ->
|
||||||
|
case Exts.newByteArray# sz# s0 of
|
||||||
|
(# sX, bufX #) ->
|
||||||
|
(# sX, bufX, 0#, sz#, Mutable buf0 off0 cs0 #)
|
||||||
|
where
|
||||||
|
!(I# sz# ) = max reqSz 0
|
||||||
|
|
||||||
-- ShortText is already UTF-8 encoded. This is a no-op.
|
-- ShortText is already UTF-8 encoded. This is a no-op.
|
||||||
shortTextToByteArray :: ShortText -> ByteArray
|
shortTextToByteArray :: ShortText -> ByteArray
|
||||||
shortTextToByteArray x = case TS.toShortByteString x of
|
shortTextToByteArray x = case TS.toShortByteString x of
|
||||||
|
|
11
test/Main.hs
11
test/Main.hs
|
@ -23,7 +23,6 @@ import qualified Data.List as L
|
||||||
import qualified Data.Primitive as PM
|
import qualified Data.Primitive as PM
|
||||||
import qualified Data.Text as T
|
import qualified Data.Text as T
|
||||||
import qualified Data.Text.Encoding as TE
|
import qualified Data.Text.Encoding as TE
|
||||||
import qualified Data.Vector as V
|
|
||||||
import qualified GHC.Exts as Exts
|
import qualified GHC.Exts as Exts
|
||||||
import qualified Test.Tasty.HUnit as THU
|
import qualified Test.Tasty.HUnit as THU
|
||||||
import qualified Test.Tasty.QuickCheck as TQC
|
import qualified Test.Tasty.QuickCheck as TQC
|
||||||
|
@ -63,7 +62,7 @@ tests = testGroup "Tests"
|
||||||
runConcat 1 (consLength32BE (word8Dec w))
|
runConcat 1 (consLength32BE (word8Dec w))
|
||||||
===
|
===
|
||||||
pack ('\x00' : '\x00' : '\x00' : chr (L.length (show w)) : show w)
|
pack ('\x00' : '\x00' : '\x00' : chr (L.length (show w)) : show w)
|
||||||
, TQC.testProperty "consLength64BE" $ \w ->
|
, TQC.testProperty "consLength64BE-uni" $ \w ->
|
||||||
pack
|
pack
|
||||||
( '\x00' : '\x00' : '\x00' : '\x00'
|
( '\x00' : '\x00' : '\x00' : '\x00'
|
||||||
: '\x00' : '\x00' : '\x00' : chr (L.length (show w))
|
: '\x00' : '\x00' : '\x00' : chr (L.length (show w))
|
||||||
|
@ -71,6 +70,14 @@ tests = testGroup "Tests"
|
||||||
)
|
)
|
||||||
===
|
===
|
||||||
runConcat 1 (consLength64BE (word16Dec w))
|
runConcat 1 (consLength64BE (word16Dec w))
|
||||||
|
, TQC.testProperty "consLength64BE-multi" $ \w ->
|
||||||
|
pack
|
||||||
|
( '\x00' : '\x00' : '\x00' : '\x00'
|
||||||
|
: '\x00' : '\x00' : '\x00' : chr (1 + L.length (show w))
|
||||||
|
: '\x42' : show w
|
||||||
|
)
|
||||||
|
===
|
||||||
|
runConcat 1 (consLength64BE (word8 0x42 <> flush 2 <> word16Dec w))
|
||||||
, THU.testCase "stringUtf8" $
|
, THU.testCase "stringUtf8" $
|
||||||
packUtf8 "¿Cómo estás? I am doing well." @=?
|
packUtf8 "¿Cómo estás? I am doing well." @=?
|
||||||
runConcat 1 (stringUtf8 "¿Cómo estás? I am doing well.")
|
runConcat 1 (stringUtf8 "¿Cómo estás? I am doing well.")
|
||||||
|
|
Loading…
Reference in a new issue