implemented wider hex builders and a couple missing builders

This commit is contained in:
Eric Demko 2020-02-06 13:58:55 -05:00 committed by Andrew Martin
parent 93d3429fe7
commit 6657cc0383
3 changed files with 124 additions and 4 deletions

View file

@ -1,5 +1,10 @@
# Revision history for small-bytearray-builder # Revision history for small-bytearray-builder
## 0.3.3.0 -- 2020-??-??
* Add `word64PaddedLowerHex` and `word32PaddedLowerHex`
* Add `word{128,256}Padded{Lower,Upper}Hex`
## 0.3.2.0 -- 2020-01-20 ## 0.3.2.0 -- 2020-01-20
* Add `putMany`, which allows pasting into the same mutable byte * Add `putMany`, which allows pasting into the same mutable byte

View file

@ -37,9 +37,16 @@ module Data.ByteArray.Builder.Bounded
, int8Dec , int8Dec
, intDec , intDec
-- * Unsigned Words -- * Unsigned Words
-- ** Wide Words
, word128PaddedLowerHex
, word128PaddedUpperHex
, word256PaddedLowerHex
, word256PaddedUpperHex
-- ** 64-bit -- ** 64-bit
, word64PaddedLowerHex
, word64PaddedUpperHex , word64PaddedUpperHex
-- ** 32-bit -- ** 32-bit
, word32PaddedLowerHex
, word32PaddedUpperHex , word32PaddedUpperHex
-- ** 16-bit -- ** 16-bit
, word16PaddedLowerHex , word16PaddedLowerHex
@ -87,7 +94,7 @@ import Data.ByteArray.Builder.Bounded.Unsafe (Builder(..))
import Data.Char (ord) import Data.Char (ord)
import Data.Primitive import Data.Primitive
import Data.Primitive.ByteArray.Offset (MutableByteArrayOffset(..)) import Data.Primitive.ByteArray.Offset (MutableByteArrayOffset(..))
import Data.WideWord (Word128(Word128)) import Data.WideWord (Word128(Word128),Word256(Word256))
import GHC.Exts import GHC.Exts
import GHC.Int (Int64(I64#),Int32(I32#),Int16(I16#),Int8(I8#)) import GHC.Int (Int64(I64#),Int32(I32#),Int16(I16#),Int8(I8#))
import GHC.ST (ST(ST)) import GHC.ST (ST(ST))
@ -323,6 +330,44 @@ toHexLower w' = fromIntegral
loSolved = w + 48 loSolved = w + 48
hiSolved = w + 87 hiSolved = w + 87
-- | Requires exactly 64 bytes. Encodes a 256-bit unsigned integer as
-- hexadecimal, zero-padding the encoding to 64 digits. This uses
-- lowercase for the alphabetical digits.
word256PaddedLowerHex :: Word256 -> Builder 64
word256PaddedLowerHex (Word256 w192 w128 w64 w0) =
word64PaddedLowerHex w192
`append` word64PaddedLowerHex w128
`append` word64PaddedLowerHex w64
`append` word64PaddedLowerHex w0
-- | Requires exactly 64 bytes. Encodes a 256-bit unsigned integer as
-- hexadecimal, zero-padding the encoding to 64 digits. This uses
-- uppercase for the alphabetical digits.
word256PaddedUpperHex :: Word256 -> Builder 64
word256PaddedUpperHex (Word256 w192 w128 w64 w0) =
word64PaddedUpperHex w192
`append` word64PaddedUpperHex w128
`append` word64PaddedUpperHex w64
`append` word64PaddedUpperHex w0
-- | Requires exactly 32 bytes. Encodes a 128-bit unsigned integer as
-- hexadecimal, zero-padding the encoding to 32 digits. This uses
-- lowercase for the alphabetical digits.
word128PaddedLowerHex :: Word128 -> Builder 32
word128PaddedLowerHex (Word128 w64 w0) =
word64PaddedLowerHex w64
`append` word64PaddedLowerHex w0
-- | Requires exactly 32 bytes. Encodes a 128-bit unsigned integer as
-- hexadecimal, zero-padding the encoding to 32 digits. This uses
-- uppercase for the alphabetical digits.
word128PaddedUpperHex :: Word128 -> Builder 32
word128PaddedUpperHex (Word128 w64 w0) =
word64PaddedUpperHex w64
`append` word64PaddedUpperHex w0
-- | Requires exactly 16 bytes. Encodes a 64-bit unsigned integer as -- | Requires exactly 16 bytes. Encodes a 64-bit unsigned integer as
-- hexadecimal, zero-padding the encoding to 16 digits. This uses -- hexadecimal, zero-padding the encoding to 16 digits. This uses
-- uppercase for the alphabetical digits. For example, this encodes the -- uppercase for the alphabetical digits. For example, this encodes the
@ -330,12 +375,25 @@ toHexLower w' = fromIntegral
word64PaddedUpperHex :: Word64 -> Builder 16 word64PaddedUpperHex :: Word64 -> Builder 16
word64PaddedUpperHex (W64# w) = word64PaddedUpperHex# w word64PaddedUpperHex (W64# w) = word64PaddedUpperHex# w
-- | Requires exactly 16 bytes. Encodes a 64-bit unsigned integer as
-- hexadecimal, zero-padding the encoding to 16 digits. This uses
-- lowercase for the alphabetical digits. For example, this encodes the
-- number 1022 as @00000000000003fe@.
word64PaddedLowerHex :: Word64 -> Builder 16
word64PaddedLowerHex (W64# w) = word64PaddedLowerHex# w
-- | Requires exactly 8 bytes. Encodes a 32-bit unsigned integer as -- | Requires exactly 8 bytes. Encodes a 32-bit unsigned integer as
-- hexadecimal, zero-padding the encoding to 8 digits. This uses -- hexadecimal, zero-padding the encoding to 8 digits. This uses
-- uppercase for the alphabetical digits. -- uppercase for the alphabetical digits.
word32PaddedUpperHex :: Word32 -> Builder 8 word32PaddedUpperHex :: Word32 -> Builder 8
word32PaddedUpperHex (W32# w) = word32PaddedUpperHex# w word32PaddedUpperHex (W32# w) = word32PaddedUpperHex# w
-- | Requires exactly 8 bytes. Encodes a 32-bit unsigned integer as
-- hexadecimal, zero-padding the encoding to 8 digits. This uses
-- lowercase for the alphabetical digits.
word32PaddedLowerHex :: Word32 -> Builder 8
word32PaddedLowerHex (W32# w) = word32PaddedLowerHex# w
-- | Requires exactly 4 bytes. Encodes a 16-bit unsigned integer as -- | Requires exactly 4 bytes. Encodes a 16-bit unsigned integer as
-- hexadecimal, zero-padding the encoding to 4 digits. This uses -- hexadecimal, zero-padding the encoding to 4 digits. This uses
-- uppercase for the alphabetical digits. -- uppercase for the alphabetical digits.
@ -415,6 +473,31 @@ word64PaddedUpperHex# w# = Unsafe.construct $ \arr off -> do
where where
w = W# w# w = W# w#
-- TODO: Is it actually worth unrolling this loop. I suspect that it
-- might not be. Benchmark this.
word64PaddedLowerHex# :: Word# -> Builder 16
{-# noinline word64PaddedLowerHex# #-}
word64PaddedLowerHex# w# = Unsafe.construct $ \arr off -> do
writeByteArray arr off (toHexLower (unsafeShiftR w 60))
writeByteArray arr (off + 1) (toHexLower (unsafeShiftR w 56))
writeByteArray arr (off + 2) (toHexLower (unsafeShiftR w 52))
writeByteArray arr (off + 3) (toHexLower (unsafeShiftR w 48))
writeByteArray arr (off + 4) (toHexLower (unsafeShiftR w 44))
writeByteArray arr (off + 5) (toHexLower (unsafeShiftR w 40))
writeByteArray arr (off + 6) (toHexLower (unsafeShiftR w 36))
writeByteArray arr (off + 7) (toHexLower (unsafeShiftR w 32))
writeByteArray arr (off + 8) (toHexLower (unsafeShiftR w 28))
writeByteArray arr (off + 9) (toHexLower (unsafeShiftR w 24))
writeByteArray arr (off + 10) (toHexLower (unsafeShiftR w 20))
writeByteArray arr (off + 11) (toHexLower (unsafeShiftR w 16))
writeByteArray arr (off + 12) (toHexLower (unsafeShiftR w 12))
writeByteArray arr (off + 13) (toHexLower (unsafeShiftR w 8))
writeByteArray arr (off + 14) (toHexLower (unsafeShiftR w 4))
writeByteArray arr (off + 15) (toHexLower (unsafeShiftR w 0))
pure (off + 16)
where
w = W# w#
word32PaddedUpperHex# :: Word# -> Builder 8 word32PaddedUpperHex# :: Word# -> Builder 8
{-# noinline word32PaddedUpperHex# #-} {-# noinline word32PaddedUpperHex# #-}
word32PaddedUpperHex# w# = Unsafe.construct $ \arr off -> do word32PaddedUpperHex# w# = Unsafe.construct $ \arr off -> do
@ -430,6 +513,21 @@ word32PaddedUpperHex# w# = Unsafe.construct $ \arr off -> do
where where
w = W# w# w = W# w#
word32PaddedLowerHex# :: Word# -> Builder 8
{-# noinline word32PaddedLowerHex# #-}
word32PaddedLowerHex# w# = Unsafe.construct $ \arr off -> do
writeByteArray arr off (toHexLower (unsafeShiftR w 28))
writeByteArray arr (off + 1) (toHexLower (unsafeShiftR w 24))
writeByteArray arr (off + 2) (toHexLower (unsafeShiftR w 20))
writeByteArray arr (off + 3) (toHexLower (unsafeShiftR w 16))
writeByteArray arr (off + 4) (toHexLower (unsafeShiftR w 12))
writeByteArray arr (off + 5) (toHexLower (unsafeShiftR w 8))
writeByteArray arr (off + 6) (toHexLower (unsafeShiftR w 4))
writeByteArray arr (off + 7) (toHexLower (unsafeShiftR w 0))
pure (off + 8)
where
w = W# w#
-- Not sure if it is beneficial to inline this. We just let -- Not sure if it is beneficial to inline this. We just let
-- GHC make the decision. Open an issue on github if this is -- GHC make the decision. Open an issue on github if this is
-- a problem. -- a problem.

View file

@ -14,7 +14,7 @@ import Data.Word
import Data.Char (ord,chr) import Data.Char (ord,chr)
import Data.IORef (IORef,newIORef,readIORef,writeIORef) import Data.IORef (IORef,newIORef,readIORef,writeIORef)
import Data.Primitive (ByteArray) import Data.Primitive (ByteArray)
import Data.WideWord (Word128(Word128)) import Data.WideWord (Word128(Word128),Word256(Word256))
import Test.Tasty (defaultMain,testGroup,TestTree) import Test.Tasty (defaultMain,testGroup,TestTree)
import Test.QuickCheck ((===),Arbitrary) import Test.QuickCheck ((===),Arbitrary)
import Text.Printf (printf) import Text.Printf (printf)
@ -57,6 +57,14 @@ tests = testGroup "Tests"
runConcat 1 (word64BE x <> word64BE y <> word64BE z) runConcat 1 (word64BE x <> word64BE y <> word64BE z)
=== ===
pack (LB.unpack (BB.toLazyByteString (BB.word64BE x <> BB.word64BE y <> BB.word64BE z))) pack (LB.unpack (BB.toLazyByteString (BB.word64BE x <> BB.word64BE y <> BB.word64BE z)))
, TQC.testProperty "word256PaddedLowerHex" $ \w ->
Bounded.run Nat.constant (Bounded.word256PaddedLowerHex w)
===
pack (showWord256PaddedLowerHex w)
, TQC.testProperty "word128PaddedUpperHex" $ \w ->
Bounded.run Nat.constant (Bounded.word128PaddedUpperHex w)
===
pack (showWord128PaddedUpperHex w)
, TQC.testProperty "word64PaddedUpperHex" $ \w -> , TQC.testProperty "word64PaddedUpperHex" $ \w ->
runConcat 1 (word64PaddedUpperHex w) runConcat 1 (word64PaddedUpperHex w)
=== ===
@ -256,11 +264,17 @@ pack = Exts.fromList . map (fromIntegral @Int @Word8 . ord)
packUtf8 :: String -> ByteArray packUtf8 :: String -> ByteArray
packUtf8 = Exts.fromList . ByteString.unpack . TE.encodeUtf8 . T.pack packUtf8 = Exts.fromList . ByteString.unpack . TE.encodeUtf8 . T.pack
showWord256PaddedLowerHex :: Word256 -> String
showWord256PaddedLowerHex (Word256 hi mhi mlo lo) = printf "%016x%016x%016x%016x" hi mhi mlo lo
showWord128PaddedUpperHex :: Word128 -> String
showWord128PaddedUpperHex (Word128 hi lo) = printf "%016X%016X" hi lo
showWord64PaddedUpperHex :: Word64 -> String showWord64PaddedUpperHex :: Word64 -> String
showWord64PaddedUpperHex = printf "%016X" showWord64PaddedUpperHex = printf "%016X"
showWord16PaddedLowerHex :: Word16 -> String showWord16PaddedLowerHex :: Word16 -> String
showWord16PaddedLowerHex = printf "%04x" showWord16PaddedLowerHex = printf "%04x"
runConcat :: Int -> Builder -> ByteArray runConcat :: Int -> Builder -> ByteArray
runConcat n = Chunks.concatU . run n runConcat n = Chunks.concatU . run n
@ -271,6 +285,9 @@ c2w = fromIntegral . ord
instance Arbitrary Word128 where instance Arbitrary Word128 where
arbitrary = liftA2 Word128 TQC.arbitrary TQC.arbitrary arbitrary = liftA2 Word128 TQC.arbitrary TQC.arbitrary
instance Arbitrary Word256 where
arbitrary = Word256 <$> TQC.arbitrary <*> TQC.arbitrary <*> TQC.arbitrary <*> TQC.arbitrary
zeroPadL :: Int -> String -> String zeroPadL :: Int -> String -> String
zeroPadL n s zeroPadL n s
| length s < n = replicate (n - length s) '0' ++ s | length s < n = replicate (n - length s) '0' ++ s