add functions for big-endian encodings, add functions for decimal representation of signed 64-bit integers
This commit is contained in:
parent
5f9ff64a51
commit
f7c42ba6be
4 changed files with 109 additions and 2 deletions
|
@ -60,6 +60,7 @@ test-suite test
|
||||||
build-depends:
|
build-depends:
|
||||||
, base >=4.12.0.0 && <5
|
, base >=4.12.0.0 && <5
|
||||||
, byteslice
|
, byteslice
|
||||||
|
, bytestring
|
||||||
, small-bytearray-builder
|
, small-bytearray-builder
|
||||||
, QuickCheck >=2.13.1 && <2.14
|
, QuickCheck >=2.13.1 && <2.14
|
||||||
, tasty-quickcheck >=0.10.1 && <0.11
|
, tasty-quickcheck >=0.10.1 && <0.11
|
||||||
|
|
|
@ -21,10 +21,16 @@ module Data.ByteArray.Builder.Small
|
||||||
-- * Materialized Byte Sequences
|
-- * Materialized Byte Sequences
|
||||||
, bytes
|
, bytes
|
||||||
, bytearray
|
, bytearray
|
||||||
-- * Numbers
|
-- * Encode Integral Types
|
||||||
|
-- ** Human-Readable
|
||||||
, word64Dec
|
, word64Dec
|
||||||
|
, int64Dec
|
||||||
, word64PaddedUpperHex
|
, word64PaddedUpperHex
|
||||||
, word32PaddedUpperHex
|
, word32PaddedUpperHex
|
||||||
|
-- ** Machine-Readable
|
||||||
|
, word64BE
|
||||||
|
, word32BE
|
||||||
|
, word16BE
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import Control.Monad.Primitive
|
import Control.Monad.Primitive
|
||||||
|
@ -32,6 +38,7 @@ import Control.Monad.ST
|
||||||
import Control.Monad.ST.Run (runByteArrayST)
|
import Control.Monad.ST.Run (runByteArrayST)
|
||||||
import Data.Bytes.Types
|
import Data.Bytes.Types
|
||||||
import Data.Primitive
|
import Data.Primitive
|
||||||
|
import Data.Int (Int64)
|
||||||
import GHC.Exts
|
import GHC.Exts
|
||||||
import GHC.ST
|
import GHC.ST
|
||||||
import GHC.Word
|
import GHC.Word
|
||||||
|
@ -182,6 +189,13 @@ bytes (Bytes src soff slen) = construct $ \(MutableBytes arr off len) -> if len
|
||||||
word64Dec :: Word64 -> Builder
|
word64Dec :: Word64 -> Builder
|
||||||
word64Dec w = fromUnsafe (Unsafe.word64Dec w)
|
word64Dec w = fromUnsafe (Unsafe.word64Dec w)
|
||||||
|
|
||||||
|
-- | Encodes a signed 64-bit integer as decimal.
|
||||||
|
-- This encoding never starts with a zero unless the argument was zero.
|
||||||
|
-- Negative numbers are preceded by a minus sign. Positive numbers
|
||||||
|
-- are not preceded by anything.
|
||||||
|
int64Dec :: Int64 -> Builder
|
||||||
|
int64Dec w = fromUnsafe (Unsafe.int64Dec w)
|
||||||
|
|
||||||
-- | Encode a 64-bit unsigned integer as hexadecimal, zero-padding
|
-- | Encode a 64-bit unsigned integer as hexadecimal, zero-padding
|
||||||
-- the encoding to 16 digits. This uses uppercase for the alphabetical
|
-- the encoding to 16 digits. This uses uppercase for the alphabetical
|
||||||
-- digits. For example, this encodes the number 1022 as @00000000000003FE@.
|
-- digits. For example, this encodes the number 1022 as @00000000000003FE@.
|
||||||
|
@ -203,3 +217,17 @@ shrinkMutableByteArray :: MutableByteArray s -> Int -> ST s ()
|
||||||
shrinkMutableByteArray (MutableByteArray arr) (I# sz) =
|
shrinkMutableByteArray (MutableByteArray arr) (I# sz) =
|
||||||
primitive_ (shrinkMutableByteArray# arr sz)
|
primitive_ (shrinkMutableByteArray# arr sz)
|
||||||
|
|
||||||
|
-- | Requires exactly 8 bytes. Dump the octets of a 64-bit
|
||||||
|
-- word in a big-endian fashion.
|
||||||
|
word64BE :: Word64 -> Builder
|
||||||
|
word64BE w = fromUnsafe (Unsafe.word64BE w)
|
||||||
|
|
||||||
|
-- | Requires exactly 4 bytes. Dump the octets of a 32-bit
|
||||||
|
-- word in a big-endian fashion.
|
||||||
|
word32BE :: Word32 -> Builder
|
||||||
|
word32BE w = fromUnsafe (Unsafe.word32BE w)
|
||||||
|
|
||||||
|
-- | Requires exactly 2 bytes. Dump the octets of a 16-bit
|
||||||
|
-- word in a big-endian fashion.
|
||||||
|
word16BE :: Word16 -> Builder
|
||||||
|
word16BE w = fromUnsafe (Unsafe.word16BE w)
|
||||||
|
|
|
@ -8,6 +8,7 @@
|
||||||
{-# language LambdaCase #-}
|
{-# language LambdaCase #-}
|
||||||
{-# language TypeOperators #-}
|
{-# language TypeOperators #-}
|
||||||
{-# language DataKinds #-}
|
{-# language DataKinds #-}
|
||||||
|
{-# language TypeApplications #-}
|
||||||
|
|
||||||
-- | The functions in this module do not check to
|
-- | The functions in this module do not check to
|
||||||
-- see if there is enough space in the buffer.
|
-- see if there is enough space in the buffer.
|
||||||
|
@ -22,20 +23,26 @@ module Data.ByteArray.Builder.Small.Unsafe
|
||||||
-- * Combine
|
-- * Combine
|
||||||
, append
|
, append
|
||||||
-- * Encode Integral Types
|
-- * Encode Integral Types
|
||||||
|
-- ** Human-Readable
|
||||||
, word64Dec
|
, word64Dec
|
||||||
|
, int64Dec
|
||||||
, word64PaddedUpperHex
|
, word64PaddedUpperHex
|
||||||
, word32PaddedUpperHex
|
, word32PaddedUpperHex
|
||||||
|
-- ** Machine-Readable
|
||||||
|
, word64BE
|
||||||
|
, word32BE
|
||||||
|
, word16BE
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import Control.Monad.Primitive
|
import Control.Monad.Primitive
|
||||||
import Control.Monad.ST
|
import Control.Monad.ST
|
||||||
import Data.Bits
|
import Data.Bits
|
||||||
import Data.Bytes.Types
|
|
||||||
import Data.Char (ord)
|
import Data.Char (ord)
|
||||||
import Data.Primitive
|
import Data.Primitive
|
||||||
import GHC.Exts
|
import GHC.Exts
|
||||||
import GHC.ST
|
import GHC.ST
|
||||||
import GHC.Word
|
import GHC.Word
|
||||||
|
import GHC.Int
|
||||||
import Data.Kind
|
import Data.Kind
|
||||||
import GHC.TypeLits (KnownNat,Nat,type (+),natVal')
|
import GHC.TypeLits (KnownNat,Nat,type (+),natVal')
|
||||||
|
|
||||||
|
@ -92,6 +99,13 @@ append (Builder f) (Builder g) =
|
||||||
word64Dec :: Word64 -> Builder 19
|
word64Dec :: Word64 -> Builder 19
|
||||||
word64Dec (W64# w) = word64Dec# w
|
word64Dec (W64# w) = word64Dec# w
|
||||||
|
|
||||||
|
-- | Requires up to 20 bytes. Encodes a signed 64-bit integer as decimal.
|
||||||
|
-- This encoding never starts with a zero unless the argument was zero.
|
||||||
|
-- Negative numbers are preceded by a minus sign. Positive numbers
|
||||||
|
-- are not preceded by anything.
|
||||||
|
int64Dec :: Int64 -> Builder 20
|
||||||
|
int64Dec (I64# w) = int64Dec# w
|
||||||
|
|
||||||
-- | Requires up to 19 bytes.
|
-- | Requires up to 19 bytes.
|
||||||
word64Dec# :: Word# -> Builder 19
|
word64Dec# :: Word# -> Builder 19
|
||||||
{-# noinline word64Dec# #-}
|
{-# noinline word64Dec# #-}
|
||||||
|
@ -112,6 +126,32 @@ word64Dec# w# = construct $ \arr off0 -> if w /= 0
|
||||||
where
|
where
|
||||||
w = W64# w#
|
w = W64# w#
|
||||||
|
|
||||||
|
internalWordLoop :: MutableByteArray s -> Int -> Word -> ST s Int
|
||||||
|
{-# inline internalWordLoop #-}
|
||||||
|
internalWordLoop arr off0 x0 = go off0 x0 where
|
||||||
|
go !off !(x :: Word) = if x > 0
|
||||||
|
then do
|
||||||
|
let (y,z) = quotRem x 10
|
||||||
|
writeByteArray arr off (fromIntegral (z + 0x30) :: Word8)
|
||||||
|
go (off + 1) y
|
||||||
|
else do
|
||||||
|
reverseBytes arr off0 (off - 1)
|
||||||
|
pure off
|
||||||
|
|
||||||
|
-- | Requires up to 19 bytes.
|
||||||
|
int64Dec# :: Int# -> Builder 20
|
||||||
|
{-# noinline int64Dec# #-}
|
||||||
|
int64Dec# w# = construct $ \arr off0 -> case compare w 0 of
|
||||||
|
GT -> internalWordLoop arr off0 (fromIntegral w)
|
||||||
|
EQ -> do
|
||||||
|
writeByteArray arr off0 (c2w '0')
|
||||||
|
pure (off0 + 1)
|
||||||
|
LT -> do
|
||||||
|
writeByteArray arr off0 (c2w '-')
|
||||||
|
internalWordLoop arr (off0 + 1) (fromIntegral (negate w))
|
||||||
|
where
|
||||||
|
w = I64# w#
|
||||||
|
|
||||||
-- Convert a number between 0 and 16 to the ASCII
|
-- Convert a number between 0 and 16 to the ASCII
|
||||||
-- representation of its hexadecimal character.
|
-- representation of its hexadecimal character.
|
||||||
-- The use of fromIntegral causes us to incur an
|
-- The use of fromIntegral causes us to incur an
|
||||||
|
@ -179,6 +219,38 @@ word32PaddedUpperHex# w# = construct $ \arr off -> do
|
||||||
where
|
where
|
||||||
w = W# w#
|
w = W# w#
|
||||||
|
|
||||||
|
-- | Requires exactly 8 bytes. Dump the octets of a 64-bit
|
||||||
|
-- word in a big-endian fashion.
|
||||||
|
word64BE :: Word64 -> Builder 8
|
||||||
|
word64BE w = construct $ \arr off -> do
|
||||||
|
writeByteArray arr (off ) (fromIntegral @Word64 @Word8 (unsafeShiftR w 56))
|
||||||
|
writeByteArray arr (off + 1) (fromIntegral @Word64 @Word8 (unsafeShiftR w 48))
|
||||||
|
writeByteArray arr (off + 2) (fromIntegral @Word64 @Word8 (unsafeShiftR w 40))
|
||||||
|
writeByteArray arr (off + 3) (fromIntegral @Word64 @Word8 (unsafeShiftR w 32))
|
||||||
|
writeByteArray arr (off + 4) (fromIntegral @Word64 @Word8 (unsafeShiftR w 24))
|
||||||
|
writeByteArray arr (off + 5) (fromIntegral @Word64 @Word8 (unsafeShiftR w 16))
|
||||||
|
writeByteArray arr (off + 6) (fromIntegral @Word64 @Word8 (unsafeShiftR w 8))
|
||||||
|
writeByteArray arr (off + 7) (fromIntegral @Word64 @Word8 w)
|
||||||
|
pure (off + 8)
|
||||||
|
|
||||||
|
-- | Requires exactly 4 bytes. Dump the octets of a 32-bit
|
||||||
|
-- word in a big-endian fashion.
|
||||||
|
word32BE :: Word32 -> Builder 4
|
||||||
|
word32BE w = construct $ \arr off -> do
|
||||||
|
writeByteArray arr (off ) (fromIntegral @Word32 @Word8 (unsafeShiftR w 24))
|
||||||
|
writeByteArray arr (off + 1) (fromIntegral @Word32 @Word8 (unsafeShiftR w 16))
|
||||||
|
writeByteArray arr (off + 2) (fromIntegral @Word32 @Word8 (unsafeShiftR w 8))
|
||||||
|
writeByteArray arr (off + 3) (fromIntegral @Word32 @Word8 w)
|
||||||
|
pure (off + 4)
|
||||||
|
|
||||||
|
-- | Requires exactly 2 bytes. Dump the octets of a 16-bit
|
||||||
|
-- word in a big-endian fashion.
|
||||||
|
word16BE :: Word16 -> Builder 2
|
||||||
|
word16BE w = construct $ \arr off -> do
|
||||||
|
writeByteArray arr (off ) (fromIntegral @Word16 @Word8 (unsafeShiftR w 8))
|
||||||
|
writeByteArray arr (off + 1) (fromIntegral @Word16 @Word8 w)
|
||||||
|
pure (off + 2)
|
||||||
|
|
||||||
-- Reverse the bytes in the designated slice. This takes
|
-- Reverse the bytes in the designated slice. This takes
|
||||||
-- an inclusive start offset and an inclusive end offset.
|
-- an inclusive start offset and an inclusive end offset.
|
||||||
reverseBytes :: MutableByteArray s -> Int -> Int -> ST s ()
|
reverseBytes :: MutableByteArray s -> Int -> Int -> ST s ()
|
||||||
|
|
|
@ -12,12 +12,14 @@ import Debug.Trace
|
||||||
import Test.Tasty (defaultMain,testGroup,TestTree)
|
import Test.Tasty (defaultMain,testGroup,TestTree)
|
||||||
import Test.QuickCheck ((===))
|
import Test.QuickCheck ((===))
|
||||||
import Text.Printf (printf)
|
import Text.Printf (printf)
|
||||||
|
import qualified Data.ByteString.Builder as BB
|
||||||
import qualified Data.Primitive as PM
|
import qualified Data.Primitive as PM
|
||||||
import qualified Data.List as L
|
import qualified Data.List as L
|
||||||
import qualified Data.Vector as V
|
import qualified Data.Vector as V
|
||||||
import qualified Test.Tasty.QuickCheck as TQC
|
import qualified Test.Tasty.QuickCheck as TQC
|
||||||
import qualified Test.QuickCheck as QC
|
import qualified Test.QuickCheck as QC
|
||||||
import qualified GHC.Exts as Exts
|
import qualified GHC.Exts as Exts
|
||||||
|
import qualified Data.ByteString.Lazy.Char8 as LB
|
||||||
|
|
||||||
main :: IO ()
|
main :: IO ()
|
||||||
main = defaultMain tests
|
main = defaultMain tests
|
||||||
|
@ -30,6 +32,10 @@ tests = testGroup "Tests"
|
||||||
run 1 (word64Dec x <> word64Dec y <> word64Dec z)
|
run 1 (word64Dec x <> word64Dec y <> word64Dec z)
|
||||||
===
|
===
|
||||||
pack (show x ++ show y ++ show z)
|
pack (show x ++ show y ++ show z)
|
||||||
|
, TQC.testProperty "word64BE-x3" $ \x y z ->
|
||||||
|
run 1 (word64BE x <> word64BE y <> word64BE z)
|
||||||
|
===
|
||||||
|
pack (LB.unpack (BB.toLazyByteString (BB.word64BE x <> BB.word64BE y <> BB.word64BE z)))
|
||||||
, TQC.testProperty "word64PaddedUpperHex" $ \w ->
|
, TQC.testProperty "word64PaddedUpperHex" $ \w ->
|
||||||
run 1 (word64PaddedUpperHex w)
|
run 1 (word64PaddedUpperHex w)
|
||||||
===
|
===
|
||||||
|
|
Loading…
Reference in a new issue