add more features
This commit is contained in:
parent
0b1a585add
commit
831c25c81a
4 changed files with 322 additions and 48 deletions
|
@ -1,10 +1,26 @@
|
|||
cabal-version: 2.2
|
||||
name: small-bytearray-builder
|
||||
version: 0.1.0.0
|
||||
-- synopsis:
|
||||
-- description:
|
||||
synopsis: Serialize to a small byte arrays
|
||||
description:
|
||||
This is similar to the builder facilities provided by
|
||||
`Data.ByteString.Builder`. It is intended to be used in
|
||||
situations where the following apply:
|
||||
.
|
||||
* An individual entity will be serialized as a small
|
||||
number of bytes (less than 512).
|
||||
.
|
||||
* A large number (more than 32) of entities will be serialized
|
||||
one after another without anything between them.
|
||||
.
|
||||
Unlike builders from the `bytestring` package, these builders
|
||||
do not track their state when they run out of space. A builder
|
||||
that runs out of space simply aborts and is rerun at the beginning
|
||||
of the next chunk. This strategy for building is suitable for most
|
||||
CSVs and several line protocols (carbon, InfluxDB, etc.).
|
||||
|
||||
homepage: https://github.com/andrewthad/small-bytearray-builder
|
||||
-- bug-reports:
|
||||
bug-reports: https://github.com/andrewthad/small-bytearray-builder/issues
|
||||
license: BSD-3-Clause
|
||||
license-file: LICENSE
|
||||
author: Andrew Martin
|
||||
|
@ -13,16 +29,28 @@ copyright: 2019 Andrew Martin
|
|||
category: Data
|
||||
extra-source-files: CHANGELOG.md
|
||||
|
||||
flag checked
|
||||
manual: True
|
||||
description: Add bounds-checking to primitive array operations
|
||||
default: False
|
||||
|
||||
library
|
||||
exposed-modules:
|
||||
Data.ByteArray.Builder.Small
|
||||
Data.ByteArray.Builder.Small.Unsafe
|
||||
-- other-modules:
|
||||
-- other-extensions:
|
||||
build-depends:
|
||||
base >=4.12.0.0 && <5
|
||||
, primitive >=0.7 && <0.8
|
||||
, byteslice >=0.1 && <0.2
|
||||
ghc-options: -Wall -O2 -ddump-to-file -ddump-simpl
|
||||
, primitive-offset >=0.2 && <0.3
|
||||
, run-st >=0.1 && <0.2
|
||||
, vector >=0.12.0.3 && <0.13
|
||||
if flag(checked)
|
||||
build-depends: primitive-checked >= 0.7 && <0.8
|
||||
else
|
||||
build-depends: primitive >= 0.7 && <0.8
|
||||
ghc-options: -Wall -O2 -ddump-to-file -ddump-simpl -dsuppress-all
|
||||
hs-source-dirs: src
|
||||
default-language: Haskell2010
|
||||
|
||||
|
@ -33,8 +61,10 @@ test-suite test
|
|||
main-is: Main.hs
|
||||
build-depends:
|
||||
base >=4.12.0.0 && <5
|
||||
, byteslice
|
||||
, small-bytearray-builder
|
||||
, QuickCheck >=2.13.1 && <2.14
|
||||
, tasty-quickcheck >=0.10.1 && <0.11
|
||||
, tasty >=1.2.3 && <1.3
|
||||
, primitive
|
||||
, vector
|
||||
|
|
|
@ -1,9 +1,10 @@
|
|||
{-# language ScopedTypeVariables #-}
|
||||
{-# language BangPatterns #-}
|
||||
{-# language MagicHash #-}
|
||||
{-# language UnboxedTuples #-}
|
||||
{-# language RankNTypes #-}
|
||||
{-# language DuplicateRecordFields #-}
|
||||
{-# language LambdaCase #-}
|
||||
{-# language MagicHash #-}
|
||||
{-# language RankNTypes #-}
|
||||
{-# language ScopedTypeVariables #-}
|
||||
{-# language UnboxedTuples #-}
|
||||
|
||||
module Data.ByteArray.Builder.Small
|
||||
( -- * Unsafe Primitives
|
||||
|
@ -13,21 +14,32 @@ module Data.ByteArray.Builder.Small
|
|||
, run
|
||||
, pasteST
|
||||
, pasteIO
|
||||
, pasteGrowST
|
||||
, pasteGrowIO
|
||||
, pasteArrayST
|
||||
, pasteArrayIO
|
||||
-- * Materialized Byte Sequences
|
||||
, bytes
|
||||
, bytearray
|
||||
-- * Numbers
|
||||
, word64Dec
|
||||
, word64PaddedUpperHex
|
||||
) where
|
||||
|
||||
import Control.Monad.Primitive
|
||||
import Control.Monad.ST
|
||||
import Control.Monad.ST.Run (runByteArrayST)
|
||||
import Data.Bytes.Types
|
||||
import Data.Char (ord)
|
||||
import Data.Primitive
|
||||
import GHC.Exts
|
||||
import GHC.ST
|
||||
import GHC.Word
|
||||
import GHC.TypeLits (KnownNat,natVal')
|
||||
import Data.Primitive.ByteArray.Offset (MutableByteArrayOffset(..))
|
||||
|
||||
import qualified Data.Primitive as PM
|
||||
import qualified Data.Vector as V
|
||||
import qualified Data.ByteArray.Builder.Small.Unsafe as Unsafe
|
||||
|
||||
-- | An unmaterialized sequence of bytes that may be pasted
|
||||
-- into a mutable byte array.
|
||||
|
@ -41,12 +53,15 @@ instance Semigroup Builder where
|
|||
1# -> g arr r (len0 +# (off0 -# r)) s1
|
||||
_ -> (# s1, (-1#) #)
|
||||
|
||||
instance Monoid Builder where
|
||||
mempty = Builder $ \_ off0 _ s0 -> (# s0, off0 #)
|
||||
|
||||
-- | Run a builder. An accurate size hint is important for good performance.
|
||||
run ::
|
||||
Int -- ^ Hint for upper bound on size
|
||||
-> Builder -- ^ Builder
|
||||
-> ByteArray
|
||||
run hint b = runST $ do
|
||||
run hint b = runByteArrayST $ do
|
||||
let go !n = do
|
||||
arr <- newByteArray n
|
||||
pasteST b (MutableBytes arr 0 n) >>= \case
|
||||
|
@ -56,6 +71,58 @@ run hint b = runST $ do
|
|||
unsafeFreezeByteArray arr
|
||||
go hint
|
||||
|
||||
pasteArrayIO ::
|
||||
MutableBytes RealWorld -- ^ Buffer
|
||||
-> (a -> Builder) -- ^ Builder
|
||||
-> V.Vector a -- ^ Elements to serialize
|
||||
-> IO (V.Vector a, MutableBytes RealWorld) -- ^ Shifted vector, shifted buffer
|
||||
pasteArrayIO !arr f !xs = stToIO (pasteArrayST arr f xs)
|
||||
|
||||
pasteArrayST ::
|
||||
MutableBytes s -- ^ Buffer
|
||||
-> (a -> Builder) -- ^ Builder
|
||||
-> V.Vector a -- ^ Elements to serialize
|
||||
-> ST s (V.Vector a, MutableBytes s) -- ^ Shifted vector, shifted buffer
|
||||
pasteArrayST (MutableBytes arr off0 len0) f !xs0 = do
|
||||
let go !xs !ixBufA !lenBufA = if V.length xs > 0
|
||||
then do
|
||||
let a = V.unsafeHead xs
|
||||
pasteST (f a) (MutableBytes arr ixBufA lenBufA) >>= \case
|
||||
Nothing -> pure (xs,MutableBytes arr ixBufA lenBufA)
|
||||
Just ixBufB ->
|
||||
go (V.unsafeTail xs) ixBufB (lenBufA + (ixBufA - ixBufB))
|
||||
else pure (xs,MutableBytes arr ixBufA lenBufA)
|
||||
go xs0 off0 len0
|
||||
|
||||
-- | Paste the builder into the byte array starting at offset zero.
|
||||
-- This repeatedly reallocates the byte array if it cannot accomodate
|
||||
-- the builder, replaying the builder each time.
|
||||
pasteGrowST ::
|
||||
Int -- ^ How many bytes to grow by at a time
|
||||
-> Builder
|
||||
-> MutableByteArray s
|
||||
-- ^ Initial buffer, used linearly. Do not reuse this argument.
|
||||
-> ST s (MutableByteArrayOffset s)
|
||||
-- ^ Final buffer that accomodated the builder.
|
||||
pasteGrowST !n b !arr0 = do
|
||||
let go !arr !sz = pasteST b (MutableBytes arr 0 sz) >>= \case
|
||||
Nothing -> do
|
||||
let szNext = sz + n
|
||||
arrNext <- PM.newByteArray szNext
|
||||
go arrNext szNext
|
||||
Just ix -> pure (MutableByteArrayOffset{array=arr,offset=ix})
|
||||
go arr0 =<< PM.getSizeofMutableByteArray arr0
|
||||
|
||||
-- | Variant of 'pasteGrowST' that runs in 'IO'.
|
||||
pasteGrowIO ::
|
||||
Int -- ^ How many bytes to grow by at a time
|
||||
-> Builder
|
||||
-> MutableByteArray RealWorld
|
||||
-- ^ Initial buffer, used linearly. Do not reuse this argument.
|
||||
-> IO (MutableByteArrayOffset RealWorld)
|
||||
-- ^ Final buffer that accomodated the builder.
|
||||
pasteGrowIO !n b !arr = stToIO (pasteGrowST n b arr)
|
||||
|
||||
pasteST :: Builder -> MutableBytes s -> ST s (Maybe Int)
|
||||
{-# inline pasteST #-}
|
||||
pasteST (Builder f) (MutableBytes (MutableByteArray arr) (I# off) (I# len)) =
|
||||
|
@ -76,6 +143,14 @@ construct f = Builder
|
|||
Nothing -> (# s1, (-1#) #)
|
||||
Just (I# n) -> (# s1, n #)
|
||||
|
||||
fromUnsafe :: forall n. KnownNat n => Unsafe.Builder n -> Builder
|
||||
{-# inline fromUnsafe #-}
|
||||
fromUnsafe (Unsafe.Builder f) = Builder $ \arr off len s0 ->
|
||||
case fromIntegral (natVal' (proxy# :: Proxy# n)) of
|
||||
I# req -> case len >=# req of
|
||||
1# -> f arr off s0
|
||||
_ -> (# s0, (-1#) #)
|
||||
|
||||
bytearray :: ByteArray -> Builder
|
||||
bytearray a = bytes (Bytes a 0 (sizeofByteArray a))
|
||||
|
||||
|
@ -87,43 +162,11 @@ bytes (Bytes src soff slen) = construct $ \(MutableBytes arr off len) -> if len
|
|||
else pure Nothing
|
||||
|
||||
word64Dec :: Word64 -> Builder
|
||||
word64Dec (W64# w) = word64Dec# w
|
||||
word64Dec w = fromUnsafe (Unsafe.word64Dec w)
|
||||
|
||||
word64Dec# :: Word# -> Builder
|
||||
{-# noinline word64Dec# #-}
|
||||
word64Dec# w# = construct $ \(MutableBytes arr off0 len) -> if len >= 19
|
||||
then if w /= 0
|
||||
then do
|
||||
let go off x = if x > 0
|
||||
then do
|
||||
let (y,z) = quotRem x 10
|
||||
writeByteArray arr off (fromIntegral (z + 0x30) :: Word8)
|
||||
go (off + 1) y
|
||||
else do
|
||||
reverseBytes arr off0 (off - 1)
|
||||
pure (Just off)
|
||||
go off0 w
|
||||
else do
|
||||
writeByteArray arr off0 (c2w '0')
|
||||
pure (Just (off0 + 1))
|
||||
else pure Nothing
|
||||
where
|
||||
w = W64# w#
|
||||
|
||||
reverseBytes :: MutableByteArray s -> Int -> Int -> ST s ()
|
||||
{-# inline reverseBytes #-}
|
||||
reverseBytes arr begin end = go begin end where
|
||||
go ixA ixB = if ixA < ixB
|
||||
then do
|
||||
a :: Word8 <- readByteArray arr ixA
|
||||
b :: Word8 <- readByteArray arr ixB
|
||||
writeByteArray arr ixA b
|
||||
writeByteArray arr ixB a
|
||||
go (ixA + 1) (ixB - 1)
|
||||
else pure ()
|
||||
|
||||
c2w :: Char -> Word8
|
||||
c2w = fromIntegral . ord
|
||||
word64PaddedUpperHex :: Word64 -> Builder
|
||||
word64PaddedUpperHex w =
|
||||
fromUnsafe (Unsafe.word64PaddedUpperHex w)
|
||||
|
||||
unST :: ST s a -> State# s -> (# State# s, a #)
|
||||
unST (ST f) = f
|
||||
|
|
165
src/Data/ByteArray/Builder/Small/Unsafe.hs
Normal file
165
src/Data/ByteArray/Builder/Small/Unsafe.hs
Normal file
|
@ -0,0 +1,165 @@
|
|||
{-# language GADTSyntax #-}
|
||||
{-# language KindSignatures #-}
|
||||
{-# language ScopedTypeVariables #-}
|
||||
{-# language BangPatterns #-}
|
||||
{-# language MagicHash #-}
|
||||
{-# language UnboxedTuples #-}
|
||||
{-# language RankNTypes #-}
|
||||
{-# language LambdaCase #-}
|
||||
{-# language TypeOperators #-}
|
||||
{-# language DataKinds #-}
|
||||
|
||||
-- | The functions in this module do not check to
|
||||
-- see if there is enough space in the buffer.
|
||||
module Data.ByteArray.Builder.Small.Unsafe
|
||||
( Builder(..)
|
||||
, run
|
||||
, pasteST
|
||||
, pasteIO
|
||||
, construct
|
||||
, append
|
||||
, word64Dec
|
||||
, word64PaddedUpperHex
|
||||
) where
|
||||
|
||||
import Control.Monad.Primitive
|
||||
import Control.Monad.ST
|
||||
import Data.Bits
|
||||
import Data.Bytes.Types
|
||||
import Data.Char (ord)
|
||||
import Data.Primitive
|
||||
import GHC.Exts
|
||||
import GHC.ST
|
||||
import GHC.Word
|
||||
import Data.Kind
|
||||
import GHC.TypeLits (KnownNat,Nat,type (+),natVal')
|
||||
|
||||
newtype Builder :: Nat -> Type where
|
||||
Builder ::
|
||||
(forall s. MutableByteArray# s -> Int# -> State# s -> (# State# s, Int# #))
|
||||
-> Builder n
|
||||
|
||||
run :: forall n. KnownNat n
|
||||
=> Builder n -- ^ Builder
|
||||
-> ByteArray
|
||||
{-# inline run #-}
|
||||
run b = runST $ do
|
||||
arr <- newByteArray (fromIntegral (natVal' (proxy# :: Proxy# n)))
|
||||
len <- pasteST b arr 0
|
||||
shrinkMutableByteArray arr len
|
||||
unsafeFreezeByteArray arr
|
||||
|
||||
-- | This function does not enforce the known upper bound on the
|
||||
-- size. It is up to the user to do this.
|
||||
pasteST :: Builder n -> MutableByteArray s -> Int -> ST s Int
|
||||
{-# inline pasteST #-}
|
||||
pasteST (Builder f) (MutableByteArray arr) (I# off) =
|
||||
ST $ \s0 -> case f arr off s0 of
|
||||
(# s1, r #) -> (# s1, (I# r) #)
|
||||
|
||||
-- | This function does not enforce the known upper bound on the
|
||||
-- size. It is up to the user to do this.
|
||||
pasteIO :: Builder n -> MutableByteArray RealWorld -> Int -> IO Int
|
||||
{-# inline pasteIO #-}
|
||||
pasteIO b m off = stToIO (pasteST b m off)
|
||||
|
||||
construct :: (forall s. MutableByteArray s -> Int -> ST s Int) -> Builder n
|
||||
{-# inline construct #-}
|
||||
construct f = Builder
|
||||
$ \arr off s0 ->
|
||||
case unST (f (MutableByteArray arr) (I# off)) s0 of
|
||||
(# s1, (I# n) #) -> (# s1, n #)
|
||||
|
||||
append :: Builder n -> Builder m -> Builder (n + m)
|
||||
append (Builder f) (Builder g) =
|
||||
Builder $ \arr off0 s0 -> case f arr off0 s0 of
|
||||
(# s1, r #) -> g arr r s1
|
||||
|
||||
-- | Requires up to 19 bytes.
|
||||
word64Dec :: Word64 -> Builder 19
|
||||
word64Dec (W64# w) = word64Dec# w
|
||||
|
||||
-- | Requires up to 19 bytes.
|
||||
word64Dec# :: Word# -> Builder 19
|
||||
{-# noinline word64Dec# #-}
|
||||
word64Dec# w# = construct $ \arr off0 -> if w /= 0
|
||||
then do
|
||||
let go off x = if x > 0
|
||||
then do
|
||||
let (y,z) = quotRem x 10
|
||||
writeByteArray arr off (fromIntegral (z + 0x30) :: Word8)
|
||||
go (off + 1) y
|
||||
else do
|
||||
reverseBytes arr off0 (off - 1)
|
||||
pure off
|
||||
go off0 w
|
||||
else do
|
||||
writeByteArray arr off0 (c2w '0')
|
||||
pure (off0 + 1)
|
||||
where
|
||||
w = W64# w#
|
||||
|
||||
-- Convert a number between 0 and 16 to the ASCII
|
||||
-- representation of its hexadecimal character.
|
||||
-- The use of fromIntegral causes us to incur an
|
||||
-- unneeded bitmask. This actually needs a Word64
|
||||
-- argument.
|
||||
toHexUpper :: Word -> Word8
|
||||
toHexUpper w' = fromIntegral
|
||||
$ (complement theMask .&. loSolved)
|
||||
.|. (theMask .&. hiSolved)
|
||||
where
|
||||
w = w' .&. 0xF
|
||||
-- This is all ones if the value was >= 10
|
||||
theMask = (1 .&. unsafeShiftR (w - 10) 63) - 1
|
||||
loSolved = w + 48
|
||||
hiSolved = w + 55
|
||||
|
||||
-- | Requires up to 16 bytes.
|
||||
word64PaddedUpperHex :: Word64 -> Builder 16
|
||||
word64PaddedUpperHex (W64# w) = word64PaddedUpperHex# w
|
||||
|
||||
word64PaddedUpperHex# :: Word# -> Builder 16
|
||||
{-# noinline word64PaddedUpperHex# #-}
|
||||
word64PaddedUpperHex# w# = construct $ \arr off -> do
|
||||
writeByteArray arr off (toHexUpper (unsafeShiftR w 60))
|
||||
writeByteArray arr (off + 1) (toHexUpper (unsafeShiftR w 56))
|
||||
writeByteArray arr (off + 2) (toHexUpper (unsafeShiftR w 52))
|
||||
writeByteArray arr (off + 3) (toHexUpper (unsafeShiftR w 48))
|
||||
writeByteArray arr (off + 4) (toHexUpper (unsafeShiftR w 44))
|
||||
writeByteArray arr (off + 5) (toHexUpper (unsafeShiftR w 40))
|
||||
writeByteArray arr (off + 6) (toHexUpper (unsafeShiftR w 36))
|
||||
writeByteArray arr (off + 7) (toHexUpper (unsafeShiftR w 32))
|
||||
writeByteArray arr (off + 8) (toHexUpper (unsafeShiftR w 28))
|
||||
writeByteArray arr (off + 9) (toHexUpper (unsafeShiftR w 24))
|
||||
writeByteArray arr (off + 10) (toHexUpper (unsafeShiftR w 20))
|
||||
writeByteArray arr (off + 11) (toHexUpper (unsafeShiftR w 16))
|
||||
writeByteArray arr (off + 12) (toHexUpper (unsafeShiftR w 12))
|
||||
writeByteArray arr (off + 13) (toHexUpper (unsafeShiftR w 8))
|
||||
writeByteArray arr (off + 14) (toHexUpper (unsafeShiftR w 4))
|
||||
writeByteArray arr (off + 15) (toHexUpper (unsafeShiftR w 0))
|
||||
pure (off + 16)
|
||||
where
|
||||
w = W# w#
|
||||
|
||||
reverseBytes :: MutableByteArray s -> Int -> Int -> ST s ()
|
||||
{-# inline reverseBytes #-}
|
||||
reverseBytes arr begin end = go begin end where
|
||||
go ixA ixB = if ixA < ixB
|
||||
then do
|
||||
a :: Word8 <- readByteArray arr ixA
|
||||
b :: Word8 <- readByteArray arr ixB
|
||||
writeByteArray arr ixA b
|
||||
writeByteArray arr ixB a
|
||||
go (ixA + 1) (ixB - 1)
|
||||
else pure ()
|
||||
|
||||
c2w :: Char -> Word8
|
||||
c2w = fromIntegral . ord
|
||||
|
||||
unST :: ST s a -> State# s -> (# State# s, a #)
|
||||
unST (ST f) = f
|
||||
|
||||
shrinkMutableByteArray :: MutableByteArray s -> Int -> ST s ()
|
||||
shrinkMutableByteArray (MutableByteArray arr) (I# sz) =
|
||||
primitive_ (shrinkMutableByteArray# arr sz)
|
36
test/Main.hs
36
test/Main.hs
|
@ -1,11 +1,20 @@
|
|||
{-# language BangPatterns #-}
|
||||
{-# language ScopedTypeVariables #-}
|
||||
{-# language TypeApplications #-}
|
||||
|
||||
import Control.Monad.ST (runST)
|
||||
import Data.Bytes.Types (MutableBytes(..))
|
||||
import Data.ByteArray.Builder.Small
|
||||
import Data.Word
|
||||
import Data.Char (ord)
|
||||
import Data.Primitive (ByteArray)
|
||||
import Debug.Trace
|
||||
import Test.Tasty (defaultMain,testGroup,TestTree)
|
||||
import Test.QuickCheck ((===))
|
||||
import Text.Printf (printf)
|
||||
import qualified Data.Primitive as PM
|
||||
import qualified Data.List as L
|
||||
import qualified Data.Vector as V
|
||||
import qualified Test.Tasty.QuickCheck as TQC
|
||||
import qualified Test.QuickCheck as QC
|
||||
import qualified GHC.Exts as Exts
|
||||
|
@ -21,8 +30,35 @@ tests = testGroup "Tests"
|
|||
run 1 (word64Dec x <> word64Dec y <> word64Dec z)
|
||||
===
|
||||
pack (show x ++ show y ++ show z)
|
||||
, TQC.testProperty "word64PaddedUpperHex" $ \w ->
|
||||
run 1 (word64PaddedUpperHex w)
|
||||
===
|
||||
pack (showWord64PaddedUpperHex w)
|
||||
, TQC.testProperty "pasteArrayST" $ \(xs :: [Word64]) ->
|
||||
(runArray word64Dec (V.fromList xs))
|
||||
===
|
||||
pack (foldMap show xs)
|
||||
]
|
||||
|
||||
pack :: String -> ByteArray
|
||||
pack = Exts.fromList . map (fromIntegral @Int @Word8 . ord)
|
||||
|
||||
-- This is used to test pasteArrayST
|
||||
runArray ::
|
||||
(a -> Builder) -- ^ Builder
|
||||
-> V.Vector a -- ^ Elements to serialize
|
||||
-> ByteArray -- ^ Number of elements serialized, serialization
|
||||
runArray f !xs = runST $ do
|
||||
let go !v0 !sz !chunks = if V.null v0
|
||||
then pure (mconcat (L.reverse chunks))
|
||||
else do
|
||||
arr <- PM.newByteArray sz
|
||||
(v1,MutableBytes _ off _) <- pasteArrayST (MutableBytes arr 0 sz) f v0
|
||||
-- If nothing was serialized, we need a bigger buffer
|
||||
let szNext = if V.length v0 == V.length v1 then sz + 1 else sz
|
||||
c <- PM.unsafeFreezeByteArray =<< PM.resizeMutableByteArray arr off
|
||||
go v1 szNext (c : chunks)
|
||||
go xs 1 []
|
||||
|
||||
showWord64PaddedUpperHex :: Word64 -> String
|
||||
showWord64PaddedUpperHex = printf "%016X"
|
||||
|
|
Loading…
Reference in a new issue