Add VLQ builders for Word32 and Word64

This commit is contained in:
Andrew Martin 2023-04-17 12:01:30 -04:00
parent 41855c4911
commit 2a6f03725f
5 changed files with 93 additions and 1 deletions

View file

@ -5,6 +5,10 @@ Note: Prior to version 0.3.4.0, this library was named
`small-bytearray-builder` is now just a compatibility shim `small-bytearray-builder` is now just a compatibility shim
to ease the migration process. to ease the migration process.
## 0.3.13.0 -- 2023-??-??
* Add VLQ builders for Word32 and Word64.
## 0.3.12.0 -- 2022-12-01 ## 0.3.12.0 -- 2022-12-01
* Support GHC 9.4. * Support GHC 9.4.

View file

@ -1,6 +1,6 @@
cabal-version: 2.2 cabal-version: 2.2
name: bytebuild name: bytebuild
version: 0.3.12.0 version: 0.3.13.0
synopsis: Build byte arrays synopsis: Build byte arrays
description: description:
This is similar to the builder facilities provided by This is similar to the builder facilities provided by

View file

@ -105,6 +105,10 @@ module Data.Bytes.Builder
, wordLEB128 , wordLEB128
, word32LEB128 , word32LEB128
, word64LEB128 , word64LEB128
-- **** VLQ
, wordVlq
, word32Vlq
, word64Vlq
-- *** Many -- *** Many
, word8Array , word8Array
-- **** Big Endian -- **** Big Endian
@ -1217,6 +1221,21 @@ word64LEB128 :: Word64 -> Builder
{-# inline word64LEB128 #-} {-# inline word64LEB128 #-}
word64LEB128 w = fromBounded Nat.constant (Bounded.word64LEB128 w) word64LEB128 w = fromBounded Nat.constant (Bounded.word64LEB128 w)
-- | Encode a machine-sized word with VLQ.
wordVlq :: Word -> Builder
{-# inline wordVlq #-}
wordVlq w = fromBounded Nat.constant (Bounded.wordVlq w)
-- | Encode a 32-bit word with VLQ.
word32Vlq :: Word32 -> Builder
{-# inline word32Vlq #-}
word32Vlq w = fromBounded Nat.constant (Bounded.word32Vlq w)
-- | Encode a 64-bit word with VLQ.
word64Vlq :: Word64 -> Builder
{-# inline word64Vlq #-}
word64Vlq w = fromBounded Nat.constant (Bounded.word64Vlq w)
-- | Encode a signed arbitrary-precision integer as decimal. -- | Encode a signed arbitrary-precision integer as decimal.
-- This encoding never starts with a zero unless the argument was zero. -- This encoding never starts with a zero unless the argument was zero.
-- Negative numbers are preceded by a minus sign. Positive numbers -- Negative numbers are preceded by a minus sign. Positive numbers

View file

@ -5,6 +5,7 @@
{-# language KindSignatures #-} {-# language KindSignatures #-}
{-# language LambdaCase #-} {-# language LambdaCase #-}
{-# language MagicHash #-} {-# language MagicHash #-}
{-# language NumericUnderscores #-}
{-# language RankNTypes #-} {-# language RankNTypes #-}
{-# language ScopedTypeVariables #-} {-# language ScopedTypeVariables #-}
{-# language TypeApplications #-} {-# language TypeApplications #-}
@ -97,9 +98,19 @@ module Data.Bytes.Builder.Bounded
, int32LE , int32LE
, int16LE , int16LE
-- **** LEB128 -- **** LEB128
-- | LEB128 encodes an integer in 7-bit units, least significant bits first,
-- with the high bit of each output byte set to 1 in all bytes except for
-- the final byte.
, wordLEB128 , wordLEB128
, word32LEB128 , word32LEB128
, word64LEB128 , word64LEB128
-- **** VLQ
-- | VLQ (also known as VByte, Varint, VInt) encodes an integer in 7-bit
-- units, most significant bits first, with the high bit of each output byte
-- set to 1 in all bytes except for the final byte.
, wordVlq
, word32Vlq
, word64Vlq
-- * Encode Floating-Point Types -- * Encode Floating-Point Types
, doubleDec , doubleDec
) where ) where
@ -886,6 +897,27 @@ ascii8 (C# c0) (C# c1) (C# c2) (C# c3) (C# c4) (C# c5) (C# c6) (C# c7) = Unsafe.
primitive_ (writeCharArray# arr (off +# 7# ) c7) primitive_ (writeCharArray# arr (off +# 7# ) c7)
pure (I# (off +# 8# )) pure (I# (off +# 8# ))
-- | Encode a machine-sized word with VLQ (also known as VByte, Varint, VInt).
wordVlq :: Word -> Builder 10
{-# inline wordVlq #-}
wordVlq (W# w) = vlqCommon (W# w)
-- | Encode a 32-bit word with VLQ (also known as VByte, Varint, VInt).
word32Vlq :: Word32 -> Builder 5
{-# inline word32Vlq #-}
word32Vlq (W32# w) = vlqCommon (W# (C.word32ToWord# w))
-- | Encode a 64-bit word with VLQ (also known as VByte, Varint, VInt).
word64Vlq :: Word64 -> Builder 10
{-# inline word64Vlq #-}
word64Vlq (W64# w) = vlqCommon (W#
#if MIN_VERSION_base(4,17,0)
(word64ToWord# w)
#else
w
#endif
)
-- | Encode a machine-sized word with LEB-128. -- | Encode a machine-sized word with LEB-128.
wordLEB128 :: Word -> Builder 10 wordLEB128 :: Word -> Builder 10
{-# inline wordLEB128 #-} {-# inline wordLEB128 #-}
@ -907,6 +939,24 @@ word64LEB128 (W64# w) = lebCommon (W#
#endif #endif
) )
vlqCommon :: Word -> Builder n
vlqCommon !w = case w of
0 -> unsafeWord8 0
_ ->
let !startIx = 7 * quot (63 - countLeadingZeros w) 7
in vlqStep startIx w
vlqStep ::
Int -- start index, must be in range [0,63] and 7 must divide it evenly
-> Word
-> Builder n
vlqStep !ix !w
| ix <= 0 =
unsafeWord8 (unsafeWordToWord8 (unsafeShiftR w ix .&. 0b0111_1111))
| otherwise = unsafeAppend
(unsafeWord8 (unsafeWordToWord8 (unsafeShiftR w ix .|. 0b1000_0000)))
(vlqStep (ix - 7) w)
lebCommon :: Word -> Builder n lebCommon :: Word -> Builder n
lebCommon !w = case quotRem w 128 of lebCommon !w = case quotRem w 128 of
(q,r) -> case q of (q,r) -> case q of

View file

@ -242,6 +242,10 @@ tests = testGroup "Tests"
in runConcat 1 (foldMap word256BE xs) in runConcat 1 (foldMap word256BE xs)
=== ===
runConcat 1 (word256ArrayBE ys 0 (Prelude.length xs)) runConcat 1 (word256ArrayBE ys 0 (Prelude.length xs))
, TQC.testProperty "word64Vlq" $ \(x :: Word64) ->
runConcat 1 (word64Vlq x)
===
naiveVlq (fromIntegral x)
, TQC.testProperty "word64LEB128" $ \(x :: Word64) -> , TQC.testProperty "word64LEB128" $ \(x :: Word64) ->
runConcat 1 (word64LEB128 x) runConcat 1 (word64LEB128 x)
=== ===
@ -416,3 +420,18 @@ naiveLeb128 x =
in if q == 0 in if q == 0
then L.reverse xs' then L.reverse xs'
else go xs' q else go xs' q
naiveVlq :: Natural -> ByteArray
naiveVlq x =
Bytes.toByteArray (Bytes.unsafeDrop 1 (Exts.fromList (0xFF : go [] x)))
where
go !xs !n =
let (q,r) = quotRem n 128
r' = fromIntegral @Natural @Word8 r
w = case xs of
[] -> r'
_ -> Bits.setBit r' 7
xs' = w : xs
in if q == 0
then xs'
else go xs' q