add more features
This commit is contained in:
parent
0b1a585add
commit
831c25c81a
4 changed files with 322 additions and 48 deletions
|
@ -1,9 +1,10 @@
|
|||
{-# language ScopedTypeVariables #-}
|
||||
{-# language BangPatterns #-}
|
||||
{-# language MagicHash #-}
|
||||
{-# language UnboxedTuples #-}
|
||||
{-# language RankNTypes #-}
|
||||
{-# language DuplicateRecordFields #-}
|
||||
{-# language LambdaCase #-}
|
||||
{-# language MagicHash #-}
|
||||
{-# language RankNTypes #-}
|
||||
{-# language ScopedTypeVariables #-}
|
||||
{-# language UnboxedTuples #-}
|
||||
|
||||
module Data.ByteArray.Builder.Small
|
||||
( -- * Unsafe Primitives
|
||||
|
@ -13,21 +14,32 @@ module Data.ByteArray.Builder.Small
|
|||
, run
|
||||
, pasteST
|
||||
, pasteIO
|
||||
, pasteGrowST
|
||||
, pasteGrowIO
|
||||
, pasteArrayST
|
||||
, pasteArrayIO
|
||||
-- * Materialized Byte Sequences
|
||||
, bytes
|
||||
, bytearray
|
||||
-- * Numbers
|
||||
, word64Dec
|
||||
, word64PaddedUpperHex
|
||||
) where
|
||||
|
||||
import Control.Monad.Primitive
|
||||
import Control.Monad.ST
|
||||
import Control.Monad.ST.Run (runByteArrayST)
|
||||
import Data.Bytes.Types
|
||||
import Data.Char (ord)
|
||||
import Data.Primitive
|
||||
import GHC.Exts
|
||||
import GHC.ST
|
||||
import GHC.Word
|
||||
import GHC.TypeLits (KnownNat,natVal')
|
||||
import Data.Primitive.ByteArray.Offset (MutableByteArrayOffset(..))
|
||||
|
||||
import qualified Data.Primitive as PM
|
||||
import qualified Data.Vector as V
|
||||
import qualified Data.ByteArray.Builder.Small.Unsafe as Unsafe
|
||||
|
||||
-- | An unmaterialized sequence of bytes that may be pasted
|
||||
-- into a mutable byte array.
|
||||
|
@ -41,12 +53,15 @@ instance Semigroup Builder where
|
|||
1# -> g arr r (len0 +# (off0 -# r)) s1
|
||||
_ -> (# s1, (-1#) #)
|
||||
|
||||
instance Monoid Builder where
|
||||
mempty = Builder $ \_ off0 _ s0 -> (# s0, off0 #)
|
||||
|
||||
-- | Run a builder. An accurate size hint is important for good performance.
|
||||
run ::
|
||||
Int -- ^ Hint for upper bound on size
|
||||
-> Builder -- ^ Builder
|
||||
-> ByteArray
|
||||
run hint b = runST $ do
|
||||
run hint b = runByteArrayST $ do
|
||||
let go !n = do
|
||||
arr <- newByteArray n
|
||||
pasteST b (MutableBytes arr 0 n) >>= \case
|
||||
|
@ -56,6 +71,58 @@ run hint b = runST $ do
|
|||
unsafeFreezeByteArray arr
|
||||
go hint
|
||||
|
||||
pasteArrayIO ::
|
||||
MutableBytes RealWorld -- ^ Buffer
|
||||
-> (a -> Builder) -- ^ Builder
|
||||
-> V.Vector a -- ^ Elements to serialize
|
||||
-> IO (V.Vector a, MutableBytes RealWorld) -- ^ Shifted vector, shifted buffer
|
||||
pasteArrayIO !arr f !xs = stToIO (pasteArrayST arr f xs)
|
||||
|
||||
pasteArrayST ::
|
||||
MutableBytes s -- ^ Buffer
|
||||
-> (a -> Builder) -- ^ Builder
|
||||
-> V.Vector a -- ^ Elements to serialize
|
||||
-> ST s (V.Vector a, MutableBytes s) -- ^ Shifted vector, shifted buffer
|
||||
pasteArrayST (MutableBytes arr off0 len0) f !xs0 = do
|
||||
let go !xs !ixBufA !lenBufA = if V.length xs > 0
|
||||
then do
|
||||
let a = V.unsafeHead xs
|
||||
pasteST (f a) (MutableBytes arr ixBufA lenBufA) >>= \case
|
||||
Nothing -> pure (xs,MutableBytes arr ixBufA lenBufA)
|
||||
Just ixBufB ->
|
||||
go (V.unsafeTail xs) ixBufB (lenBufA + (ixBufA - ixBufB))
|
||||
else pure (xs,MutableBytes arr ixBufA lenBufA)
|
||||
go xs0 off0 len0
|
||||
|
||||
-- | Paste the builder into the byte array starting at offset zero.
|
||||
-- This repeatedly reallocates the byte array if it cannot accomodate
|
||||
-- the builder, replaying the builder each time.
|
||||
pasteGrowST ::
|
||||
Int -- ^ How many bytes to grow by at a time
|
||||
-> Builder
|
||||
-> MutableByteArray s
|
||||
-- ^ Initial buffer, used linearly. Do not reuse this argument.
|
||||
-> ST s (MutableByteArrayOffset s)
|
||||
-- ^ Final buffer that accomodated the builder.
|
||||
pasteGrowST !n b !arr0 = do
|
||||
let go !arr !sz = pasteST b (MutableBytes arr 0 sz) >>= \case
|
||||
Nothing -> do
|
||||
let szNext = sz + n
|
||||
arrNext <- PM.newByteArray szNext
|
||||
go arrNext szNext
|
||||
Just ix -> pure (MutableByteArrayOffset{array=arr,offset=ix})
|
||||
go arr0 =<< PM.getSizeofMutableByteArray arr0
|
||||
|
||||
-- | Variant of 'pasteGrowST' that runs in 'IO'.
|
||||
pasteGrowIO ::
|
||||
Int -- ^ How many bytes to grow by at a time
|
||||
-> Builder
|
||||
-> MutableByteArray RealWorld
|
||||
-- ^ Initial buffer, used linearly. Do not reuse this argument.
|
||||
-> IO (MutableByteArrayOffset RealWorld)
|
||||
-- ^ Final buffer that accomodated the builder.
|
||||
pasteGrowIO !n b !arr = stToIO (pasteGrowST n b arr)
|
||||
|
||||
pasteST :: Builder -> MutableBytes s -> ST s (Maybe Int)
|
||||
{-# inline pasteST #-}
|
||||
pasteST (Builder f) (MutableBytes (MutableByteArray arr) (I# off) (I# len)) =
|
||||
|
@ -76,6 +143,14 @@ construct f = Builder
|
|||
Nothing -> (# s1, (-1#) #)
|
||||
Just (I# n) -> (# s1, n #)
|
||||
|
||||
fromUnsafe :: forall n. KnownNat n => Unsafe.Builder n -> Builder
|
||||
{-# inline fromUnsafe #-}
|
||||
fromUnsafe (Unsafe.Builder f) = Builder $ \arr off len s0 ->
|
||||
case fromIntegral (natVal' (proxy# :: Proxy# n)) of
|
||||
I# req -> case len >=# req of
|
||||
1# -> f arr off s0
|
||||
_ -> (# s0, (-1#) #)
|
||||
|
||||
bytearray :: ByteArray -> Builder
|
||||
bytearray a = bytes (Bytes a 0 (sizeofByteArray a))
|
||||
|
||||
|
@ -87,43 +162,11 @@ bytes (Bytes src soff slen) = construct $ \(MutableBytes arr off len) -> if len
|
|||
else pure Nothing
|
||||
|
||||
word64Dec :: Word64 -> Builder
|
||||
word64Dec (W64# w) = word64Dec# w
|
||||
word64Dec w = fromUnsafe (Unsafe.word64Dec w)
|
||||
|
||||
word64Dec# :: Word# -> Builder
|
||||
{-# noinline word64Dec# #-}
|
||||
word64Dec# w# = construct $ \(MutableBytes arr off0 len) -> if len >= 19
|
||||
then if w /= 0
|
||||
then do
|
||||
let go off x = if x > 0
|
||||
then do
|
||||
let (y,z) = quotRem x 10
|
||||
writeByteArray arr off (fromIntegral (z + 0x30) :: Word8)
|
||||
go (off + 1) y
|
||||
else do
|
||||
reverseBytes arr off0 (off - 1)
|
||||
pure (Just off)
|
||||
go off0 w
|
||||
else do
|
||||
writeByteArray arr off0 (c2w '0')
|
||||
pure (Just (off0 + 1))
|
||||
else pure Nothing
|
||||
where
|
||||
w = W64# w#
|
||||
|
||||
reverseBytes :: MutableByteArray s -> Int -> Int -> ST s ()
|
||||
{-# inline reverseBytes #-}
|
||||
reverseBytes arr begin end = go begin end where
|
||||
go ixA ixB = if ixA < ixB
|
||||
then do
|
||||
a :: Word8 <- readByteArray arr ixA
|
||||
b :: Word8 <- readByteArray arr ixB
|
||||
writeByteArray arr ixA b
|
||||
writeByteArray arr ixB a
|
||||
go (ixA + 1) (ixB - 1)
|
||||
else pure ()
|
||||
|
||||
c2w :: Char -> Word8
|
||||
c2w = fromIntegral . ord
|
||||
word64PaddedUpperHex :: Word64 -> Builder
|
||||
word64PaddedUpperHex w =
|
||||
fromUnsafe (Unsafe.word64PaddedUpperHex w)
|
||||
|
||||
unST :: ST s a -> State# s -> (# State# s, a #)
|
||||
unST (ST f) = f
|
||||
|
|
165
src/Data/ByteArray/Builder/Small/Unsafe.hs
Normal file
165
src/Data/ByteArray/Builder/Small/Unsafe.hs
Normal file
|
@ -0,0 +1,165 @@
|
|||
{-# language GADTSyntax #-}
|
||||
{-# language KindSignatures #-}
|
||||
{-# language ScopedTypeVariables #-}
|
||||
{-# language BangPatterns #-}
|
||||
{-# language MagicHash #-}
|
||||
{-# language UnboxedTuples #-}
|
||||
{-# language RankNTypes #-}
|
||||
{-# language LambdaCase #-}
|
||||
{-# language TypeOperators #-}
|
||||
{-# language DataKinds #-}
|
||||
|
||||
-- | The functions in this module do not check to
|
||||
-- see if there is enough space in the buffer.
|
||||
module Data.ByteArray.Builder.Small.Unsafe
|
||||
( Builder(..)
|
||||
, run
|
||||
, pasteST
|
||||
, pasteIO
|
||||
, construct
|
||||
, append
|
||||
, word64Dec
|
||||
, word64PaddedUpperHex
|
||||
) where
|
||||
|
||||
import Control.Monad.Primitive
|
||||
import Control.Monad.ST
|
||||
import Data.Bits
|
||||
import Data.Bytes.Types
|
||||
import Data.Char (ord)
|
||||
import Data.Primitive
|
||||
import GHC.Exts
|
||||
import GHC.ST
|
||||
import GHC.Word
|
||||
import Data.Kind
|
||||
import GHC.TypeLits (KnownNat,Nat,type (+),natVal')
|
||||
|
||||
newtype Builder :: Nat -> Type where
|
||||
Builder ::
|
||||
(forall s. MutableByteArray# s -> Int# -> State# s -> (# State# s, Int# #))
|
||||
-> Builder n
|
||||
|
||||
run :: forall n. KnownNat n
|
||||
=> Builder n -- ^ Builder
|
||||
-> ByteArray
|
||||
{-# inline run #-}
|
||||
run b = runST $ do
|
||||
arr <- newByteArray (fromIntegral (natVal' (proxy# :: Proxy# n)))
|
||||
len <- pasteST b arr 0
|
||||
shrinkMutableByteArray arr len
|
||||
unsafeFreezeByteArray arr
|
||||
|
||||
-- | This function does not enforce the known upper bound on the
|
||||
-- size. It is up to the user to do this.
|
||||
pasteST :: Builder n -> MutableByteArray s -> Int -> ST s Int
|
||||
{-# inline pasteST #-}
|
||||
pasteST (Builder f) (MutableByteArray arr) (I# off) =
|
||||
ST $ \s0 -> case f arr off s0 of
|
||||
(# s1, r #) -> (# s1, (I# r) #)
|
||||
|
||||
-- | This function does not enforce the known upper bound on the
|
||||
-- size. It is up to the user to do this.
|
||||
pasteIO :: Builder n -> MutableByteArray RealWorld -> Int -> IO Int
|
||||
{-# inline pasteIO #-}
|
||||
pasteIO b m off = stToIO (pasteST b m off)
|
||||
|
||||
construct :: (forall s. MutableByteArray s -> Int -> ST s Int) -> Builder n
|
||||
{-# inline construct #-}
|
||||
construct f = Builder
|
||||
$ \arr off s0 ->
|
||||
case unST (f (MutableByteArray arr) (I# off)) s0 of
|
||||
(# s1, (I# n) #) -> (# s1, n #)
|
||||
|
||||
append :: Builder n -> Builder m -> Builder (n + m)
|
||||
append (Builder f) (Builder g) =
|
||||
Builder $ \arr off0 s0 -> case f arr off0 s0 of
|
||||
(# s1, r #) -> g arr r s1
|
||||
|
||||
-- | Requires up to 19 bytes.
|
||||
word64Dec :: Word64 -> Builder 19
|
||||
word64Dec (W64# w) = word64Dec# w
|
||||
|
||||
-- | Requires up to 19 bytes.
|
||||
word64Dec# :: Word# -> Builder 19
|
||||
{-# noinline word64Dec# #-}
|
||||
word64Dec# w# = construct $ \arr off0 -> if w /= 0
|
||||
then do
|
||||
let go off x = if x > 0
|
||||
then do
|
||||
let (y,z) = quotRem x 10
|
||||
writeByteArray arr off (fromIntegral (z + 0x30) :: Word8)
|
||||
go (off + 1) y
|
||||
else do
|
||||
reverseBytes arr off0 (off - 1)
|
||||
pure off
|
||||
go off0 w
|
||||
else do
|
||||
writeByteArray arr off0 (c2w '0')
|
||||
pure (off0 + 1)
|
||||
where
|
||||
w = W64# w#
|
||||
|
||||
-- Convert a number between 0 and 16 to the ASCII
|
||||
-- representation of its hexadecimal character.
|
||||
-- The use of fromIntegral causes us to incur an
|
||||
-- unneeded bitmask. This actually needs a Word64
|
||||
-- argument.
|
||||
toHexUpper :: Word -> Word8
|
||||
toHexUpper w' = fromIntegral
|
||||
$ (complement theMask .&. loSolved)
|
||||
.|. (theMask .&. hiSolved)
|
||||
where
|
||||
w = w' .&. 0xF
|
||||
-- This is all ones if the value was >= 10
|
||||
theMask = (1 .&. unsafeShiftR (w - 10) 63) - 1
|
||||
loSolved = w + 48
|
||||
hiSolved = w + 55
|
||||
|
||||
-- | Requires up to 16 bytes.
|
||||
word64PaddedUpperHex :: Word64 -> Builder 16
|
||||
word64PaddedUpperHex (W64# w) = word64PaddedUpperHex# w
|
||||
|
||||
word64PaddedUpperHex# :: Word# -> Builder 16
|
||||
{-# noinline word64PaddedUpperHex# #-}
|
||||
word64PaddedUpperHex# w# = construct $ \arr off -> do
|
||||
writeByteArray arr off (toHexUpper (unsafeShiftR w 60))
|
||||
writeByteArray arr (off + 1) (toHexUpper (unsafeShiftR w 56))
|
||||
writeByteArray arr (off + 2) (toHexUpper (unsafeShiftR w 52))
|
||||
writeByteArray arr (off + 3) (toHexUpper (unsafeShiftR w 48))
|
||||
writeByteArray arr (off + 4) (toHexUpper (unsafeShiftR w 44))
|
||||
writeByteArray arr (off + 5) (toHexUpper (unsafeShiftR w 40))
|
||||
writeByteArray arr (off + 6) (toHexUpper (unsafeShiftR w 36))
|
||||
writeByteArray arr (off + 7) (toHexUpper (unsafeShiftR w 32))
|
||||
writeByteArray arr (off + 8) (toHexUpper (unsafeShiftR w 28))
|
||||
writeByteArray arr (off + 9) (toHexUpper (unsafeShiftR w 24))
|
||||
writeByteArray arr (off + 10) (toHexUpper (unsafeShiftR w 20))
|
||||
writeByteArray arr (off + 11) (toHexUpper (unsafeShiftR w 16))
|
||||
writeByteArray arr (off + 12) (toHexUpper (unsafeShiftR w 12))
|
||||
writeByteArray arr (off + 13) (toHexUpper (unsafeShiftR w 8))
|
||||
writeByteArray arr (off + 14) (toHexUpper (unsafeShiftR w 4))
|
||||
writeByteArray arr (off + 15) (toHexUpper (unsafeShiftR w 0))
|
||||
pure (off + 16)
|
||||
where
|
||||
w = W# w#
|
||||
|
||||
reverseBytes :: MutableByteArray s -> Int -> Int -> ST s ()
|
||||
{-# inline reverseBytes #-}
|
||||
reverseBytes arr begin end = go begin end where
|
||||
go ixA ixB = if ixA < ixB
|
||||
then do
|
||||
a :: Word8 <- readByteArray arr ixA
|
||||
b :: Word8 <- readByteArray arr ixB
|
||||
writeByteArray arr ixA b
|
||||
writeByteArray arr ixB a
|
||||
go (ixA + 1) (ixB - 1)
|
||||
else pure ()
|
||||
|
||||
c2w :: Char -> Word8
|
||||
c2w = fromIntegral . ord
|
||||
|
||||
unST :: ST s a -> State# s -> (# State# s, a #)
|
||||
unST (ST f) = f
|
||||
|
||||
shrinkMutableByteArray :: MutableByteArray s -> Int -> ST s ()
|
||||
shrinkMutableByteArray (MutableByteArray arr) (I# sz) =
|
||||
primitive_ (shrinkMutableByteArray# arr sz)
|
Loading…
Add table
Add a link
Reference in a new issue