Add VLQ builders for Word32 and Word64
This commit is contained in:
parent
41855c4911
commit
2a6f03725f
5 changed files with 93 additions and 1 deletions
|
@ -5,6 +5,10 @@ Note: Prior to version 0.3.4.0, this library was named
|
|||
`small-bytearray-builder` is now just a compatibility shim
|
||||
to ease the migration process.
|
||||
|
||||
## 0.3.13.0 -- 2023-??-??
|
||||
|
||||
* Add VLQ builders for Word32 and Word64.
|
||||
|
||||
## 0.3.12.0 -- 2022-12-01
|
||||
|
||||
* Support GHC 9.4.
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
cabal-version: 2.2
|
||||
name: bytebuild
|
||||
version: 0.3.12.0
|
||||
version: 0.3.13.0
|
||||
synopsis: Build byte arrays
|
||||
description:
|
||||
This is similar to the builder facilities provided by
|
||||
|
|
|
@ -105,6 +105,10 @@ module Data.Bytes.Builder
|
|||
, wordLEB128
|
||||
, word32LEB128
|
||||
, word64LEB128
|
||||
-- **** VLQ
|
||||
, wordVlq
|
||||
, word32Vlq
|
||||
, word64Vlq
|
||||
-- *** Many
|
||||
, word8Array
|
||||
-- **** Big Endian
|
||||
|
@ -1217,6 +1221,21 @@ word64LEB128 :: Word64 -> Builder
|
|||
{-# inline word64LEB128 #-}
|
||||
word64LEB128 w = fromBounded Nat.constant (Bounded.word64LEB128 w)
|
||||
|
||||
-- | Encode a machine-sized word with VLQ.
|
||||
wordVlq :: Word -> Builder
|
||||
{-# inline wordVlq #-}
|
||||
wordVlq w = fromBounded Nat.constant (Bounded.wordVlq w)
|
||||
|
||||
-- | Encode a 32-bit word with VLQ.
|
||||
word32Vlq :: Word32 -> Builder
|
||||
{-# inline word32Vlq #-}
|
||||
word32Vlq w = fromBounded Nat.constant (Bounded.word32Vlq w)
|
||||
|
||||
-- | Encode a 64-bit word with VLQ.
|
||||
word64Vlq :: Word64 -> Builder
|
||||
{-# inline word64Vlq #-}
|
||||
word64Vlq w = fromBounded Nat.constant (Bounded.word64Vlq w)
|
||||
|
||||
-- | Encode a signed arbitrary-precision integer as decimal.
|
||||
-- This encoding never starts with a zero unless the argument was zero.
|
||||
-- Negative numbers are preceded by a minus sign. Positive numbers
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
{-# language KindSignatures #-}
|
||||
{-# language LambdaCase #-}
|
||||
{-# language MagicHash #-}
|
||||
{-# language NumericUnderscores #-}
|
||||
{-# language RankNTypes #-}
|
||||
{-# language ScopedTypeVariables #-}
|
||||
{-# language TypeApplications #-}
|
||||
|
@ -97,9 +98,19 @@ module Data.Bytes.Builder.Bounded
|
|||
, int32LE
|
||||
, int16LE
|
||||
-- **** LEB128
|
||||
-- | LEB128 encodes an integer in 7-bit units, least significant bits first,
|
||||
-- with the high bit of each output byte set to 1 in all bytes except for
|
||||
-- the final byte.
|
||||
, wordLEB128
|
||||
, word32LEB128
|
||||
, word64LEB128
|
||||
-- **** VLQ
|
||||
-- | VLQ (also known as VByte, Varint, VInt) encodes an integer in 7-bit
|
||||
-- units, most significant bits first, with the high bit of each output byte
|
||||
-- set to 1 in all bytes except for the final byte.
|
||||
, wordVlq
|
||||
, word32Vlq
|
||||
, word64Vlq
|
||||
-- * Encode Floating-Point Types
|
||||
, doubleDec
|
||||
) where
|
||||
|
@ -886,6 +897,27 @@ ascii8 (C# c0) (C# c1) (C# c2) (C# c3) (C# c4) (C# c5) (C# c6) (C# c7) = Unsafe.
|
|||
primitive_ (writeCharArray# arr (off +# 7# ) c7)
|
||||
pure (I# (off +# 8# ))
|
||||
|
||||
-- | Encode a machine-sized word with VLQ (also known as VByte, Varint, VInt).
|
||||
wordVlq :: Word -> Builder 10
|
||||
{-# inline wordVlq #-}
|
||||
wordVlq (W# w) = vlqCommon (W# w)
|
||||
|
||||
-- | Encode a 32-bit word with VLQ (also known as VByte, Varint, VInt).
|
||||
word32Vlq :: Word32 -> Builder 5
|
||||
{-# inline word32Vlq #-}
|
||||
word32Vlq (W32# w) = vlqCommon (W# (C.word32ToWord# w))
|
||||
|
||||
-- | Encode a 64-bit word with VLQ (also known as VByte, Varint, VInt).
|
||||
word64Vlq :: Word64 -> Builder 10
|
||||
{-# inline word64Vlq #-}
|
||||
word64Vlq (W64# w) = vlqCommon (W#
|
||||
#if MIN_VERSION_base(4,17,0)
|
||||
(word64ToWord# w)
|
||||
#else
|
||||
w
|
||||
#endif
|
||||
)
|
||||
|
||||
-- | Encode a machine-sized word with LEB-128.
|
||||
wordLEB128 :: Word -> Builder 10
|
||||
{-# inline wordLEB128 #-}
|
||||
|
@ -907,6 +939,24 @@ word64LEB128 (W64# w) = lebCommon (W#
|
|||
#endif
|
||||
)
|
||||
|
||||
vlqCommon :: Word -> Builder n
|
||||
vlqCommon !w = case w of
|
||||
0 -> unsafeWord8 0
|
||||
_ ->
|
||||
let !startIx = 7 * quot (63 - countLeadingZeros w) 7
|
||||
in vlqStep startIx w
|
||||
|
||||
vlqStep ::
|
||||
Int -- start index, must be in range [0,63] and 7 must divide it evenly
|
||||
-> Word
|
||||
-> Builder n
|
||||
vlqStep !ix !w
|
||||
| ix <= 0 =
|
||||
unsafeWord8 (unsafeWordToWord8 (unsafeShiftR w ix .&. 0b0111_1111))
|
||||
| otherwise = unsafeAppend
|
||||
(unsafeWord8 (unsafeWordToWord8 (unsafeShiftR w ix .|. 0b1000_0000)))
|
||||
(vlqStep (ix - 7) w)
|
||||
|
||||
lebCommon :: Word -> Builder n
|
||||
lebCommon !w = case quotRem w 128 of
|
||||
(q,r) -> case q of
|
||||
|
|
19
test/Main.hs
19
test/Main.hs
|
@ -242,6 +242,10 @@ tests = testGroup "Tests"
|
|||
in runConcat 1 (foldMap word256BE xs)
|
||||
===
|
||||
runConcat 1 (word256ArrayBE ys 0 (Prelude.length xs))
|
||||
, TQC.testProperty "word64Vlq" $ \(x :: Word64) ->
|
||||
runConcat 1 (word64Vlq x)
|
||||
===
|
||||
naiveVlq (fromIntegral x)
|
||||
, TQC.testProperty "word64LEB128" $ \(x :: Word64) ->
|
||||
runConcat 1 (word64LEB128 x)
|
||||
===
|
||||
|
@ -416,3 +420,18 @@ naiveLeb128 x =
|
|||
in if q == 0
|
||||
then L.reverse xs'
|
||||
else go xs' q
|
||||
|
||||
naiveVlq :: Natural -> ByteArray
|
||||
naiveVlq x =
|
||||
Bytes.toByteArray (Bytes.unsafeDrop 1 (Exts.fromList (0xFF : go [] x)))
|
||||
where
|
||||
go !xs !n =
|
||||
let (q,r) = quotRem n 128
|
||||
r' = fromIntegral @Natural @Word8 r
|
||||
w = case xs of
|
||||
[] -> r'
|
||||
_ -> Bits.setBit r' 7
|
||||
xs' = w : xs
|
||||
in if q == 0
|
||||
then xs'
|
||||
else go xs' q
|
||||
|
|
Loading…
Reference in a new issue