250 lines
8.7 KiB
Haskell
250 lines
8.7 KiB
Haskell
{-# language BangPatterns #-}
|
|
{-# language DuplicateRecordFields #-}
|
|
{-# language LambdaCase #-}
|
|
{-# language MagicHash #-}
|
|
{-# language RankNTypes #-}
|
|
{-# language ScopedTypeVariables #-}
|
|
{-# language UnboxedTuples #-}
|
|
|
|
module Data.ByteArray.Builder.Small
|
|
( -- * Unsafe Primitives
|
|
Builder(..)
|
|
, construct
|
|
, fromUnsafe
|
|
-- * Evaluation
|
|
, run
|
|
, pasteST
|
|
, pasteIO
|
|
, pasteGrowST
|
|
, pasteGrowIO
|
|
, pasteArrayST
|
|
, pasteArrayIO
|
|
-- * Materialized Byte Sequences
|
|
, bytes
|
|
, bytearray
|
|
-- * Encode Integral Types
|
|
-- ** Human-Readable
|
|
, word64Dec
|
|
, int64Dec
|
|
, word64PaddedUpperHex
|
|
, word32PaddedUpperHex
|
|
, word16PaddedUpperHex
|
|
, word8PaddedUpperHex
|
|
-- ** Machine-Readable
|
|
, word64BE
|
|
, word32BE
|
|
, word16BE
|
|
) where
|
|
|
|
import Control.Monad.Primitive
|
|
import Control.Monad.ST
|
|
import Control.Monad.ST.Run (runByteArrayST)
|
|
import Data.Bytes.Types
|
|
import Data.Primitive
|
|
import Data.Int (Int64)
|
|
import GHC.Exts
|
|
import GHC.ST
|
|
import GHC.Word
|
|
import GHC.TypeLits (KnownNat,natVal')
|
|
import Data.Primitive.ByteArray.Offset (MutableByteArrayOffset(..))
|
|
|
|
import qualified Data.Primitive as PM
|
|
import qualified Data.Vector as V
|
|
import qualified Data.ByteArray.Builder.Small.Unsafe as Unsafe
|
|
|
|
-- | An unmaterialized sequence of bytes that may be pasted
|
|
-- into a mutable byte array.
|
|
newtype Builder = Builder
|
|
(forall s. MutableByteArray# s -> Int# -> Int# -> State# s -> (# State# s, Int# #))
|
|
|
|
instance Semigroup Builder where
|
|
{-# inline (<>) #-}
|
|
Builder f <> Builder g = Builder $ \arr off0 len0 s0 -> case f arr off0 len0 s0 of
|
|
(# s1, r #) -> case r /=# (-1#) of
|
|
1# -> g arr r (len0 +# (off0 -# r)) s1
|
|
_ -> (# s1, (-1#) #)
|
|
|
|
instance Monoid Builder where
|
|
mempty = Builder $ \_ off0 _ s0 -> (# s0, off0 #)
|
|
|
|
-- | Run a builder. An accurate size hint is important for good performance.
|
|
-- The size hint should be slightly larger than the actual size.
|
|
run ::
|
|
Int -- ^ Hint for upper bound on size
|
|
-> Builder -- ^ Builder
|
|
-> ByteArray
|
|
run hint b = runByteArrayST $ do
|
|
let go !n = do
|
|
arr <- newByteArray n
|
|
pasteST b (MutableBytes arr 0 n) >>= \case
|
|
Nothing -> go (n + 64)
|
|
Just len -> do
|
|
shrinkMutableByteArray arr len
|
|
unsafeFreezeByteArray arr
|
|
go hint
|
|
|
|
-- | Variant of 'pasteArrayST' that runs in 'IO'.
|
|
pasteArrayIO ::
|
|
MutableBytes RealWorld -- ^ Buffer
|
|
-> (a -> Builder) -- ^ Builder
|
|
-> V.Vector a -- ^ Elements to serialize
|
|
-> IO (V.Vector a, MutableBytes RealWorld) -- ^ Shifted vector, shifted buffer
|
|
pasteArrayIO !arr f !xs = stToIO (pasteArrayST arr f xs)
|
|
|
|
-- | Fold over a vector, applying the builder to each element until
|
|
-- the buffer cannot accomodate any more.
|
|
pasteArrayST ::
|
|
MutableBytes s -- ^ Buffer
|
|
-> (a -> Builder) -- ^ Builder
|
|
-> V.Vector a -- ^ Elements to serialize
|
|
-> ST s (V.Vector a, MutableBytes s) -- ^ Shifted vector, shifted buffer
|
|
pasteArrayST (MutableBytes arr off0 len0) f !xs0 = do
|
|
let go !xs !ixBufA !lenBufA = if V.length xs > 0
|
|
then do
|
|
let a = V.unsafeHead xs
|
|
pasteST (f a) (MutableBytes arr ixBufA lenBufA) >>= \case
|
|
Nothing -> pure (xs,MutableBytes arr ixBufA lenBufA)
|
|
Just ixBufB ->
|
|
go (V.unsafeTail xs) ixBufB (lenBufA + (ixBufA - ixBufB))
|
|
else pure (xs,MutableBytes arr ixBufA lenBufA)
|
|
go xs0 off0 len0
|
|
|
|
-- | Paste the builder into the byte array starting at offset zero.
|
|
-- This repeatedly reallocates the byte array if it cannot accomodate
|
|
-- the builder, replaying the builder each time.
|
|
pasteGrowST ::
|
|
Int -- ^ How many bytes to grow by at a time
|
|
-> Builder
|
|
-> MutableByteArray s
|
|
-- ^ Initial buffer, used linearly. Do not reuse this argument.
|
|
-> ST s (MutableByteArrayOffset s)
|
|
-- ^ Final buffer that accomodated the builder.
|
|
pasteGrowST !n b !arr0 = do
|
|
let go !arr !sz = pasteST b (MutableBytes arr 0 sz) >>= \case
|
|
Nothing -> do
|
|
let szNext = sz + n
|
|
arrNext <- PM.newByteArray szNext
|
|
go arrNext szNext
|
|
Just ix -> pure (MutableByteArrayOffset{array=arr,offset=ix})
|
|
go arr0 =<< PM.getSizeofMutableByteArray arr0
|
|
|
|
-- | Variant of 'pasteGrowST' that runs in 'IO'.
|
|
pasteGrowIO ::
|
|
Int -- ^ How many bytes to grow by at a time
|
|
-> Builder
|
|
-> MutableByteArray RealWorld
|
|
-- ^ Initial buffer, used linearly. Do not reuse this argument.
|
|
-> IO (MutableByteArrayOffset RealWorld)
|
|
-- ^ Final buffer that accomodated the builder.
|
|
pasteGrowIO !n b !arr = stToIO (pasteGrowST n b arr)
|
|
|
|
-- | Execute the builder, pasting its contents into a buffer.
|
|
-- If the buffer is not large enough, this returns 'Nothing'.
|
|
-- Otherwise, it returns the index in the buffer that follows
|
|
-- the payload just written.
|
|
pasteST :: Builder -> MutableBytes s -> ST s (Maybe Int)
|
|
{-# inline pasteST #-}
|
|
pasteST (Builder f) (MutableBytes (MutableByteArray arr) (I# off) (I# len)) =
|
|
ST $ \s0 -> case f arr off len s0 of
|
|
(# s1, r #) -> if isTrue# (r /=# (-1#))
|
|
then (# s1, Just (I# r) #)
|
|
else (# s1, Nothing #)
|
|
|
|
-- | Variant of 'pasteST' that runs in 'IO'.
|
|
pasteIO :: Builder -> MutableBytes RealWorld -> IO (Maybe Int)
|
|
{-# inline pasteIO #-}
|
|
pasteIO b m = stToIO (pasteST b m)
|
|
|
|
-- | Constructor for 'Builder' that works on a function with lifted
|
|
-- arguments instead of unlifted ones. This is just as unsafe as the
|
|
-- actual constructor.
|
|
construct :: (forall s. MutableBytes s -> ST s (Maybe Int)) -> Builder
|
|
construct f = Builder
|
|
$ \arr off len s0 ->
|
|
case unST (f (MutableBytes (MutableByteArray arr) (I# off) (I# len))) s0 of
|
|
(# s1, m #) -> case m of
|
|
Nothing -> (# s1, (-1#) #)
|
|
Just (I# n) -> (# s1, n #)
|
|
|
|
fromUnsafe :: forall n. KnownNat n => Unsafe.Builder n -> Builder
|
|
{-# inline fromUnsafe #-}
|
|
fromUnsafe (Unsafe.Builder f) = Builder $ \arr off len s0 ->
|
|
case fromIntegral (natVal' (proxy# :: Proxy# n)) of
|
|
I# req -> case len >=# req of
|
|
1# -> f arr off s0
|
|
_ -> (# s0, (-1#) #)
|
|
|
|
-- | Create a builder from an unsliced byte sequence.
|
|
bytearray :: ByteArray -> Builder
|
|
bytearray a = bytes (Bytes a 0 (sizeofByteArray a))
|
|
|
|
-- | Create a builder from a sliced byte sequence.
|
|
bytes :: Bytes -> Builder
|
|
bytes (Bytes src soff slen) = construct $ \(MutableBytes arr off len) -> if len >= slen
|
|
then do
|
|
copyByteArray arr off src soff slen
|
|
pure (Just (len - slen))
|
|
else pure Nothing
|
|
|
|
-- | Encodes an unsigned 64-bit integer as decimal.
|
|
-- This encoding never starts with a zero unless the
|
|
-- argument was zero.
|
|
word64Dec :: Word64 -> Builder
|
|
word64Dec w = fromUnsafe (Unsafe.word64Dec w)
|
|
|
|
-- | Encodes a signed 64-bit integer as decimal.
|
|
-- This encoding never starts with a zero unless the argument was zero.
|
|
-- Negative numbers are preceded by a minus sign. Positive numbers
|
|
-- are not preceded by anything.
|
|
int64Dec :: Int64 -> Builder
|
|
int64Dec w = fromUnsafe (Unsafe.int64Dec w)
|
|
|
|
-- | Encode a 64-bit unsigned integer as hexadecimal, zero-padding
|
|
-- the encoding to 16 digits. This uses uppercase for the alphabetical
|
|
-- digits. For example, this encodes the number 1022 as @00000000000003FE@.
|
|
word64PaddedUpperHex :: Word64 -> Builder
|
|
word64PaddedUpperHex w =
|
|
fromUnsafe (Unsafe.word64PaddedUpperHex w)
|
|
|
|
-- | Encode a 32-bit unsigned integer as hexadecimal, zero-padding
|
|
-- the encoding to 8 digits. This uses uppercase for the alphabetical
|
|
-- digits. For example, this encodes the number 1022 as @000003FE@.
|
|
word32PaddedUpperHex :: Word32 -> Builder
|
|
word32PaddedUpperHex w =
|
|
fromUnsafe (Unsafe.word32PaddedUpperHex w)
|
|
|
|
-- | Encode a 16-bit unsigned integer as hexadecimal, zero-padding
|
|
-- the encoding to 4 digits. This uses uppercase for the alphabetical
|
|
-- digits. For example, this encodes the number 1022 as @03FE@.
|
|
word16PaddedUpperHex :: Word16 -> Builder
|
|
word16PaddedUpperHex w =
|
|
fromUnsafe (Unsafe.word16PaddedUpperHex w)
|
|
|
|
-- | Encode a 8-bit unsigned integer as hexadecimal, zero-padding
|
|
-- the encoding to 2 digits. This uses uppercase for the alphabetical
|
|
-- digits. For example, this encodes the number 11 as @0B@.
|
|
word8PaddedUpperHex :: Word8 -> Builder
|
|
word8PaddedUpperHex w =
|
|
fromUnsafe (Unsafe.word8PaddedUpperHex w)
|
|
|
|
unST :: ST s a -> State# s -> (# State# s, a #)
|
|
unST (ST f) = f
|
|
|
|
shrinkMutableByteArray :: MutableByteArray s -> Int -> ST s ()
|
|
shrinkMutableByteArray (MutableByteArray arr) (I# sz) =
|
|
primitive_ (shrinkMutableByteArray# arr sz)
|
|
|
|
-- | Requires exactly 8 bytes. Dump the octets of a 64-bit
|
|
-- word in a big-endian fashion.
|
|
word64BE :: Word64 -> Builder
|
|
word64BE w = fromUnsafe (Unsafe.word64BE w)
|
|
|
|
-- | Requires exactly 4 bytes. Dump the octets of a 32-bit
|
|
-- word in a big-endian fashion.
|
|
word32BE :: Word32 -> Builder
|
|
word32BE w = fromUnsafe (Unsafe.word32BE w)
|
|
|
|
-- | Requires exactly 2 bytes. Dump the octets of a 16-bit
|
|
-- word in a big-endian fashion.
|
|
word16BE :: Word16 -> Builder
|
|
word16BE w = fromUnsafe (Unsafe.word16BE w)
|