Add VLQ builders for Word32 and Word64
This commit is contained in:
parent
41855c4911
commit
2a6f03725f
5 changed files with 93 additions and 1 deletions
|
@ -5,6 +5,10 @@ Note: Prior to version 0.3.4.0, this library was named
|
||||||
`small-bytearray-builder` is now just a compatibility shim
|
`small-bytearray-builder` is now just a compatibility shim
|
||||||
to ease the migration process.
|
to ease the migration process.
|
||||||
|
|
||||||
|
## 0.3.13.0 -- 2023-??-??
|
||||||
|
|
||||||
|
* Add VLQ builders for Word32 and Word64.
|
||||||
|
|
||||||
## 0.3.12.0 -- 2022-12-01
|
## 0.3.12.0 -- 2022-12-01
|
||||||
|
|
||||||
* Support GHC 9.4.
|
* Support GHC 9.4.
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
cabal-version: 2.2
|
cabal-version: 2.2
|
||||||
name: bytebuild
|
name: bytebuild
|
||||||
version: 0.3.12.0
|
version: 0.3.13.0
|
||||||
synopsis: Build byte arrays
|
synopsis: Build byte arrays
|
||||||
description:
|
description:
|
||||||
This is similar to the builder facilities provided by
|
This is similar to the builder facilities provided by
|
||||||
|
|
|
@ -105,6 +105,10 @@ module Data.Bytes.Builder
|
||||||
, wordLEB128
|
, wordLEB128
|
||||||
, word32LEB128
|
, word32LEB128
|
||||||
, word64LEB128
|
, word64LEB128
|
||||||
|
-- **** VLQ
|
||||||
|
, wordVlq
|
||||||
|
, word32Vlq
|
||||||
|
, word64Vlq
|
||||||
-- *** Many
|
-- *** Many
|
||||||
, word8Array
|
, word8Array
|
||||||
-- **** Big Endian
|
-- **** Big Endian
|
||||||
|
@ -1217,6 +1221,21 @@ word64LEB128 :: Word64 -> Builder
|
||||||
{-# inline word64LEB128 #-}
|
{-# inline word64LEB128 #-}
|
||||||
word64LEB128 w = fromBounded Nat.constant (Bounded.word64LEB128 w)
|
word64LEB128 w = fromBounded Nat.constant (Bounded.word64LEB128 w)
|
||||||
|
|
||||||
|
-- | Encode a machine-sized word with VLQ.
|
||||||
|
wordVlq :: Word -> Builder
|
||||||
|
{-# inline wordVlq #-}
|
||||||
|
wordVlq w = fromBounded Nat.constant (Bounded.wordVlq w)
|
||||||
|
|
||||||
|
-- | Encode a 32-bit word with VLQ.
|
||||||
|
word32Vlq :: Word32 -> Builder
|
||||||
|
{-# inline word32Vlq #-}
|
||||||
|
word32Vlq w = fromBounded Nat.constant (Bounded.word32Vlq w)
|
||||||
|
|
||||||
|
-- | Encode a 64-bit word with VLQ.
|
||||||
|
word64Vlq :: Word64 -> Builder
|
||||||
|
{-# inline word64Vlq #-}
|
||||||
|
word64Vlq w = fromBounded Nat.constant (Bounded.word64Vlq w)
|
||||||
|
|
||||||
-- | Encode a signed arbitrary-precision integer as decimal.
|
-- | Encode a signed arbitrary-precision integer as decimal.
|
||||||
-- This encoding never starts with a zero unless the argument was zero.
|
-- This encoding never starts with a zero unless the argument was zero.
|
||||||
-- Negative numbers are preceded by a minus sign. Positive numbers
|
-- Negative numbers are preceded by a minus sign. Positive numbers
|
||||||
|
|
|
@ -5,6 +5,7 @@
|
||||||
{-# language KindSignatures #-}
|
{-# language KindSignatures #-}
|
||||||
{-# language LambdaCase #-}
|
{-# language LambdaCase #-}
|
||||||
{-# language MagicHash #-}
|
{-# language MagicHash #-}
|
||||||
|
{-# language NumericUnderscores #-}
|
||||||
{-# language RankNTypes #-}
|
{-# language RankNTypes #-}
|
||||||
{-# language ScopedTypeVariables #-}
|
{-# language ScopedTypeVariables #-}
|
||||||
{-# language TypeApplications #-}
|
{-# language TypeApplications #-}
|
||||||
|
@ -97,9 +98,19 @@ module Data.Bytes.Builder.Bounded
|
||||||
, int32LE
|
, int32LE
|
||||||
, int16LE
|
, int16LE
|
||||||
-- **** LEB128
|
-- **** LEB128
|
||||||
|
-- | LEB128 encodes an integer in 7-bit units, least significant bits first,
|
||||||
|
-- with the high bit of each output byte set to 1 in all bytes except for
|
||||||
|
-- the final byte.
|
||||||
, wordLEB128
|
, wordLEB128
|
||||||
, word32LEB128
|
, word32LEB128
|
||||||
, word64LEB128
|
, word64LEB128
|
||||||
|
-- **** VLQ
|
||||||
|
-- | VLQ (also known as VByte, Varint, VInt) encodes an integer in 7-bit
|
||||||
|
-- units, most significant bits first, with the high bit of each output byte
|
||||||
|
-- set to 1 in all bytes except for the final byte.
|
||||||
|
, wordVlq
|
||||||
|
, word32Vlq
|
||||||
|
, word64Vlq
|
||||||
-- * Encode Floating-Point Types
|
-- * Encode Floating-Point Types
|
||||||
, doubleDec
|
, doubleDec
|
||||||
) where
|
) where
|
||||||
|
@ -886,6 +897,27 @@ ascii8 (C# c0) (C# c1) (C# c2) (C# c3) (C# c4) (C# c5) (C# c6) (C# c7) = Unsafe.
|
||||||
primitive_ (writeCharArray# arr (off +# 7# ) c7)
|
primitive_ (writeCharArray# arr (off +# 7# ) c7)
|
||||||
pure (I# (off +# 8# ))
|
pure (I# (off +# 8# ))
|
||||||
|
|
||||||
|
-- | Encode a machine-sized word with VLQ (also known as VByte, Varint, VInt).
|
||||||
|
wordVlq :: Word -> Builder 10
|
||||||
|
{-# inline wordVlq #-}
|
||||||
|
wordVlq (W# w) = vlqCommon (W# w)
|
||||||
|
|
||||||
|
-- | Encode a 32-bit word with VLQ (also known as VByte, Varint, VInt).
|
||||||
|
word32Vlq :: Word32 -> Builder 5
|
||||||
|
{-# inline word32Vlq #-}
|
||||||
|
word32Vlq (W32# w) = vlqCommon (W# (C.word32ToWord# w))
|
||||||
|
|
||||||
|
-- | Encode a 64-bit word with VLQ (also known as VByte, Varint, VInt).
|
||||||
|
word64Vlq :: Word64 -> Builder 10
|
||||||
|
{-# inline word64Vlq #-}
|
||||||
|
word64Vlq (W64# w) = vlqCommon (W#
|
||||||
|
#if MIN_VERSION_base(4,17,0)
|
||||||
|
(word64ToWord# w)
|
||||||
|
#else
|
||||||
|
w
|
||||||
|
#endif
|
||||||
|
)
|
||||||
|
|
||||||
-- | Encode a machine-sized word with LEB-128.
|
-- | Encode a machine-sized word with LEB-128.
|
||||||
wordLEB128 :: Word -> Builder 10
|
wordLEB128 :: Word -> Builder 10
|
||||||
{-# inline wordLEB128 #-}
|
{-# inline wordLEB128 #-}
|
||||||
|
@ -907,6 +939,24 @@ word64LEB128 (W64# w) = lebCommon (W#
|
||||||
#endif
|
#endif
|
||||||
)
|
)
|
||||||
|
|
||||||
|
vlqCommon :: Word -> Builder n
|
||||||
|
vlqCommon !w = case w of
|
||||||
|
0 -> unsafeWord8 0
|
||||||
|
_ ->
|
||||||
|
let !startIx = 7 * quot (63 - countLeadingZeros w) 7
|
||||||
|
in vlqStep startIx w
|
||||||
|
|
||||||
|
vlqStep ::
|
||||||
|
Int -- start index, must be in range [0,63] and 7 must divide it evenly
|
||||||
|
-> Word
|
||||||
|
-> Builder n
|
||||||
|
vlqStep !ix !w
|
||||||
|
| ix <= 0 =
|
||||||
|
unsafeWord8 (unsafeWordToWord8 (unsafeShiftR w ix .&. 0b0111_1111))
|
||||||
|
| otherwise = unsafeAppend
|
||||||
|
(unsafeWord8 (unsafeWordToWord8 (unsafeShiftR w ix .|. 0b1000_0000)))
|
||||||
|
(vlqStep (ix - 7) w)
|
||||||
|
|
||||||
lebCommon :: Word -> Builder n
|
lebCommon :: Word -> Builder n
|
||||||
lebCommon !w = case quotRem w 128 of
|
lebCommon !w = case quotRem w 128 of
|
||||||
(q,r) -> case q of
|
(q,r) -> case q of
|
||||||
|
|
19
test/Main.hs
19
test/Main.hs
|
@ -242,6 +242,10 @@ tests = testGroup "Tests"
|
||||||
in runConcat 1 (foldMap word256BE xs)
|
in runConcat 1 (foldMap word256BE xs)
|
||||||
===
|
===
|
||||||
runConcat 1 (word256ArrayBE ys 0 (Prelude.length xs))
|
runConcat 1 (word256ArrayBE ys 0 (Prelude.length xs))
|
||||||
|
, TQC.testProperty "word64Vlq" $ \(x :: Word64) ->
|
||||||
|
runConcat 1 (word64Vlq x)
|
||||||
|
===
|
||||||
|
naiveVlq (fromIntegral x)
|
||||||
, TQC.testProperty "word64LEB128" $ \(x :: Word64) ->
|
, TQC.testProperty "word64LEB128" $ \(x :: Word64) ->
|
||||||
runConcat 1 (word64LEB128 x)
|
runConcat 1 (word64LEB128 x)
|
||||||
===
|
===
|
||||||
|
@ -416,3 +420,18 @@ naiveLeb128 x =
|
||||||
in if q == 0
|
in if q == 0
|
||||||
then L.reverse xs'
|
then L.reverse xs'
|
||||||
else go xs' q
|
else go xs' q
|
||||||
|
|
||||||
|
naiveVlq :: Natural -> ByteArray
|
||||||
|
naiveVlq x =
|
||||||
|
Bytes.toByteArray (Bytes.unsafeDrop 1 (Exts.fromList (0xFF : go [] x)))
|
||||||
|
where
|
||||||
|
go !xs !n =
|
||||||
|
let (q,r) = quotRem n 128
|
||||||
|
r' = fromIntegral @Natural @Word8 r
|
||||||
|
w = case xs of
|
||||||
|
[] -> r'
|
||||||
|
_ -> Bits.setBit r' 7
|
||||||
|
xs' = w : xs
|
||||||
|
in if q == 0
|
||||||
|
then xs'
|
||||||
|
else go xs' q
|
||||||
|
|
Loading…
Reference in a new issue