add functions for big-endian encodings, add functions for decimal representation of signed 64-bit integers

This commit is contained in:
Andrew Martin 2019-07-03 16:28:36 -04:00
parent 5f9ff64a51
commit f7c42ba6be
4 changed files with 109 additions and 2 deletions

View file

@ -60,6 +60,7 @@ test-suite test
build-depends: build-depends:
, base >=4.12.0.0 && <5 , base >=4.12.0.0 && <5
, byteslice , byteslice
, bytestring
, small-bytearray-builder , small-bytearray-builder
, QuickCheck >=2.13.1 && <2.14 , QuickCheck >=2.13.1 && <2.14
, tasty-quickcheck >=0.10.1 && <0.11 , tasty-quickcheck >=0.10.1 && <0.11

View file

@ -21,10 +21,16 @@ module Data.ByteArray.Builder.Small
-- * Materialized Byte Sequences -- * Materialized Byte Sequences
, bytes , bytes
, bytearray , bytearray
-- * Numbers -- * Encode Integral Types
-- ** Human-Readable
, word64Dec , word64Dec
, int64Dec
, word64PaddedUpperHex , word64PaddedUpperHex
, word32PaddedUpperHex , word32PaddedUpperHex
-- ** Machine-Readable
, word64BE
, word32BE
, word16BE
) where ) where
import Control.Monad.Primitive import Control.Monad.Primitive
@ -32,6 +38,7 @@ import Control.Monad.ST
import Control.Monad.ST.Run (runByteArrayST) import Control.Monad.ST.Run (runByteArrayST)
import Data.Bytes.Types import Data.Bytes.Types
import Data.Primitive import Data.Primitive
import Data.Int (Int64)
import GHC.Exts import GHC.Exts
import GHC.ST import GHC.ST
import GHC.Word import GHC.Word
@ -182,6 +189,13 @@ bytes (Bytes src soff slen) = construct $ \(MutableBytes arr off len) -> if len
word64Dec :: Word64 -> Builder word64Dec :: Word64 -> Builder
word64Dec w = fromUnsafe (Unsafe.word64Dec w) word64Dec w = fromUnsafe (Unsafe.word64Dec w)
-- | Encodes a signed 64-bit integer as decimal.
-- This encoding never starts with a zero unless the argument was zero.
-- Negative numbers are preceded by a minus sign. Positive numbers
-- are not preceded by anything.
int64Dec :: Int64 -> Builder
int64Dec w = fromUnsafe (Unsafe.int64Dec w)
-- | Encode a 64-bit unsigned integer as hexadecimal, zero-padding -- | Encode a 64-bit unsigned integer as hexadecimal, zero-padding
-- the encoding to 16 digits. This uses uppercase for the alphabetical -- the encoding to 16 digits. This uses uppercase for the alphabetical
-- digits. For example, this encodes the number 1022 as @00000000000003FE@. -- digits. For example, this encodes the number 1022 as @00000000000003FE@.
@ -203,3 +217,17 @@ shrinkMutableByteArray :: MutableByteArray s -> Int -> ST s ()
shrinkMutableByteArray (MutableByteArray arr) (I# sz) = shrinkMutableByteArray (MutableByteArray arr) (I# sz) =
primitive_ (shrinkMutableByteArray# arr sz) primitive_ (shrinkMutableByteArray# arr sz)
-- | Requires exactly 8 bytes. Dump the octets of a 64-bit
-- word in a big-endian fashion.
word64BE :: Word64 -> Builder
word64BE w = fromUnsafe (Unsafe.word64BE w)
-- | Requires exactly 4 bytes. Dump the octets of a 32-bit
-- word in a big-endian fashion.
word32BE :: Word32 -> Builder
word32BE w = fromUnsafe (Unsafe.word32BE w)
-- | Requires exactly 2 bytes. Dump the octets of a 16-bit
-- word in a big-endian fashion.
word16BE :: Word16 -> Builder
word16BE w = fromUnsafe (Unsafe.word16BE w)

View file

@ -8,6 +8,7 @@
{-# language LambdaCase #-} {-# language LambdaCase #-}
{-# language TypeOperators #-} {-# language TypeOperators #-}
{-# language DataKinds #-} {-# language DataKinds #-}
{-# language TypeApplications #-}
-- | The functions in this module do not check to -- | The functions in this module do not check to
-- see if there is enough space in the buffer. -- see if there is enough space in the buffer.
@ -22,20 +23,26 @@ module Data.ByteArray.Builder.Small.Unsafe
-- * Combine -- * Combine
, append , append
-- * Encode Integral Types -- * Encode Integral Types
-- ** Human-Readable
, word64Dec , word64Dec
, int64Dec
, word64PaddedUpperHex , word64PaddedUpperHex
, word32PaddedUpperHex , word32PaddedUpperHex
-- ** Machine-Readable
, word64BE
, word32BE
, word16BE
) where ) where
import Control.Monad.Primitive import Control.Monad.Primitive
import Control.Monad.ST import Control.Monad.ST
import Data.Bits import Data.Bits
import Data.Bytes.Types
import Data.Char (ord) import Data.Char (ord)
import Data.Primitive import Data.Primitive
import GHC.Exts import GHC.Exts
import GHC.ST import GHC.ST
import GHC.Word import GHC.Word
import GHC.Int
import Data.Kind import Data.Kind
import GHC.TypeLits (KnownNat,Nat,type (+),natVal') import GHC.TypeLits (KnownNat,Nat,type (+),natVal')
@ -92,6 +99,13 @@ append (Builder f) (Builder g) =
word64Dec :: Word64 -> Builder 19 word64Dec :: Word64 -> Builder 19
word64Dec (W64# w) = word64Dec# w word64Dec (W64# w) = word64Dec# w
-- | Requires up to 20 bytes. Encodes a signed 64-bit integer as decimal.
-- This encoding never starts with a zero unless the argument was zero.
-- Negative numbers are preceded by a minus sign. Positive numbers
-- are not preceded by anything.
int64Dec :: Int64 -> Builder 20
int64Dec (I64# w) = int64Dec# w
-- | Requires up to 19 bytes. -- | Requires up to 19 bytes.
word64Dec# :: Word# -> Builder 19 word64Dec# :: Word# -> Builder 19
{-# noinline word64Dec# #-} {-# noinline word64Dec# #-}
@ -112,6 +126,32 @@ word64Dec# w# = construct $ \arr off0 -> if w /= 0
where where
w = W64# w# w = W64# w#
internalWordLoop :: MutableByteArray s -> Int -> Word -> ST s Int
{-# inline internalWordLoop #-}
internalWordLoop arr off0 x0 = go off0 x0 where
go !off !(x :: Word) = if x > 0
then do
let (y,z) = quotRem x 10
writeByteArray arr off (fromIntegral (z + 0x30) :: Word8)
go (off + 1) y
else do
reverseBytes arr off0 (off - 1)
pure off
-- | Requires up to 19 bytes.
int64Dec# :: Int# -> Builder 20
{-# noinline int64Dec# #-}
int64Dec# w# = construct $ \arr off0 -> case compare w 0 of
GT -> internalWordLoop arr off0 (fromIntegral w)
EQ -> do
writeByteArray arr off0 (c2w '0')
pure (off0 + 1)
LT -> do
writeByteArray arr off0 (c2w '-')
internalWordLoop arr (off0 + 1) (fromIntegral (negate w))
where
w = I64# w#
-- Convert a number between 0 and 16 to the ASCII -- Convert a number between 0 and 16 to the ASCII
-- representation of its hexadecimal character. -- representation of its hexadecimal character.
-- The use of fromIntegral causes us to incur an -- The use of fromIntegral causes us to incur an
@ -179,6 +219,38 @@ word32PaddedUpperHex# w# = construct $ \arr off -> do
where where
w = W# w# w = W# w#
-- | Requires exactly 8 bytes. Dump the octets of a 64-bit
-- word in a big-endian fashion.
word64BE :: Word64 -> Builder 8
word64BE w = construct $ \arr off -> do
writeByteArray arr (off ) (fromIntegral @Word64 @Word8 (unsafeShiftR w 56))
writeByteArray arr (off + 1) (fromIntegral @Word64 @Word8 (unsafeShiftR w 48))
writeByteArray arr (off + 2) (fromIntegral @Word64 @Word8 (unsafeShiftR w 40))
writeByteArray arr (off + 3) (fromIntegral @Word64 @Word8 (unsafeShiftR w 32))
writeByteArray arr (off + 4) (fromIntegral @Word64 @Word8 (unsafeShiftR w 24))
writeByteArray arr (off + 5) (fromIntegral @Word64 @Word8 (unsafeShiftR w 16))
writeByteArray arr (off + 6) (fromIntegral @Word64 @Word8 (unsafeShiftR w 8))
writeByteArray arr (off + 7) (fromIntegral @Word64 @Word8 w)
pure (off + 8)
-- | Requires exactly 4 bytes. Dump the octets of a 32-bit
-- word in a big-endian fashion.
word32BE :: Word32 -> Builder 4
word32BE w = construct $ \arr off -> do
writeByteArray arr (off ) (fromIntegral @Word32 @Word8 (unsafeShiftR w 24))
writeByteArray arr (off + 1) (fromIntegral @Word32 @Word8 (unsafeShiftR w 16))
writeByteArray arr (off + 2) (fromIntegral @Word32 @Word8 (unsafeShiftR w 8))
writeByteArray arr (off + 3) (fromIntegral @Word32 @Word8 w)
pure (off + 4)
-- | Requires exactly 2 bytes. Dump the octets of a 16-bit
-- word in a big-endian fashion.
word16BE :: Word16 -> Builder 2
word16BE w = construct $ \arr off -> do
writeByteArray arr (off ) (fromIntegral @Word16 @Word8 (unsafeShiftR w 8))
writeByteArray arr (off + 1) (fromIntegral @Word16 @Word8 w)
pure (off + 2)
-- Reverse the bytes in the designated slice. This takes -- Reverse the bytes in the designated slice. This takes
-- an inclusive start offset and an inclusive end offset. -- an inclusive start offset and an inclusive end offset.
reverseBytes :: MutableByteArray s -> Int -> Int -> ST s () reverseBytes :: MutableByteArray s -> Int -> Int -> ST s ()

View file

@ -12,12 +12,14 @@ import Debug.Trace
import Test.Tasty (defaultMain,testGroup,TestTree) import Test.Tasty (defaultMain,testGroup,TestTree)
import Test.QuickCheck ((===)) import Test.QuickCheck ((===))
import Text.Printf (printf) import Text.Printf (printf)
import qualified Data.ByteString.Builder as BB
import qualified Data.Primitive as PM import qualified Data.Primitive as PM
import qualified Data.List as L import qualified Data.List as L
import qualified Data.Vector as V import qualified Data.Vector as V
import qualified Test.Tasty.QuickCheck as TQC import qualified Test.Tasty.QuickCheck as TQC
import qualified Test.QuickCheck as QC import qualified Test.QuickCheck as QC
import qualified GHC.Exts as Exts import qualified GHC.Exts as Exts
import qualified Data.ByteString.Lazy.Char8 as LB
main :: IO () main :: IO ()
main = defaultMain tests main = defaultMain tests
@ -30,6 +32,10 @@ tests = testGroup "Tests"
run 1 (word64Dec x <> word64Dec y <> word64Dec z) run 1 (word64Dec x <> word64Dec y <> word64Dec z)
=== ===
pack (show x ++ show y ++ show z) pack (show x ++ show y ++ show z)
, TQC.testProperty "word64BE-x3" $ \x y z ->
run 1 (word64BE x <> word64BE y <> word64BE z)
===
pack (LB.unpack (BB.toLazyByteString (BB.word64BE x <> BB.word64BE y <> BB.word64BE z)))
, TQC.testProperty "word64PaddedUpperHex" $ \w -> , TQC.testProperty "word64PaddedUpperHex" $ \w ->
run 1 (word64PaddedUpperHex w) run 1 (word64PaddedUpperHex w)
=== ===