Add wordLEB128, word64LEB128, integerDec, naturalDec, and word48PaddedLowerHex
This commit is contained in:
parent
2ce46c4c4a
commit
d39c76a65a
5 changed files with 240 additions and 8 deletions
|
@ -45,6 +45,8 @@ module Data.Bytes.Builder.Bounded
|
|||
-- ** 64-bit
|
||||
, word64PaddedLowerHex
|
||||
, word64PaddedUpperHex
|
||||
-- ** 48-bit
|
||||
, word48PaddedLowerHex
|
||||
-- ** 32-bit
|
||||
, word32PaddedLowerHex
|
||||
, word32PaddedUpperHex
|
||||
|
@ -89,6 +91,9 @@ module Data.Bytes.Builder.Bounded
|
|||
, int64LE
|
||||
, int32LE
|
||||
, int16LE
|
||||
-- **** LEB128
|
||||
, wordLEB128
|
||||
, word64LEB128
|
||||
-- * Encode Floating-Point Types
|
||||
, doubleDec
|
||||
) where
|
||||
|
@ -161,7 +166,10 @@ infixr 9 `append`
|
|||
|
||||
-- | Concatenate two builders.
|
||||
append :: Builder m -> Builder n -> Builder (m + n)
|
||||
append (Builder f) (Builder g) =
|
||||
append = unsafeAppend
|
||||
|
||||
unsafeAppend :: Builder m -> Builder n -> Builder p
|
||||
unsafeAppend (Builder f) (Builder g) =
|
||||
Builder $ \arr off0 s0 -> case f arr off0 s0 of
|
||||
(# s1, r #) -> g arr r s1
|
||||
|
||||
|
@ -286,15 +294,20 @@ wordCommonDec# w# = Unsafe.construct $ \arr off0 -> if w /= 0
|
|||
|
||||
internalWordLoop :: MutableByteArray s -> Int -> Word -> ST s Int
|
||||
{-# inline internalWordLoop #-}
|
||||
internalWordLoop arr off0 x0 = go off0 x0 where
|
||||
internalWordLoop arr off0 x0 = do
|
||||
off1 <- backwardsWordLoop arr off0 x0
|
||||
reverseBytes arr off0 (off1 - 1)
|
||||
pure off1
|
||||
|
||||
backwardsWordLoop :: MutableByteArray s -> Int -> Word -> ST s Int
|
||||
{-# inline backwardsWordLoop #-}
|
||||
backwardsWordLoop arr off0 x0 = go off0 x0 where
|
||||
go !off !(x :: Word) = if x > 0
|
||||
then do
|
||||
let (y,z) = quotRem x 10
|
||||
writeByteArray arr off (fromIntegral (z + 0x30) :: Word8)
|
||||
go (off + 1) y
|
||||
else do
|
||||
reverseBytes arr off0 (off - 1)
|
||||
pure off
|
||||
else pure off
|
||||
|
||||
-- Requires up to 20 bytes. Can be less depending on what the
|
||||
-- size of the argument is known to be. Unsafe.
|
||||
|
@ -390,6 +403,14 @@ word64PaddedUpperHex (W64# w) = word64PaddedUpperHex# w
|
|||
word64PaddedLowerHex :: Word64 -> Builder 16
|
||||
word64PaddedLowerHex (W64# w) = word64PaddedLowerHex# w
|
||||
|
||||
-- | Requires exactly 12 bytes. Discards the upper 16 bits of a
|
||||
-- 64-bit unsigned integer and then encodes the lower 48 bits as
|
||||
-- hexadecimal, zero-padding the encoding to 12 digits. This uses
|
||||
-- lowercase for the alphabetical digits. For example, this encodes the
|
||||
-- number 1022 as @0000000003fe@.
|
||||
word48PaddedLowerHex :: Word64 -> Builder 12
|
||||
word48PaddedLowerHex (W64# w) = word48PaddedLowerHex# w
|
||||
|
||||
-- | Requires exactly 8 bytes. Encodes a 32-bit unsigned integer as
|
||||
-- hexadecimal, zero-padding the encoding to 8 digits. This uses
|
||||
-- uppercase for the alphabetical digits.
|
||||
|
@ -481,6 +502,27 @@ word64PaddedUpperHex# w# = Unsafe.construct $ \arr off -> do
|
|||
where
|
||||
w = W# w#
|
||||
|
||||
-- TODO: Is it actually worth unrolling this loop. I suspect that it
|
||||
-- might not be. Benchmark this.
|
||||
word48PaddedLowerHex# :: Word# -> Builder 12
|
||||
{-# noinline word48PaddedLowerHex# #-}
|
||||
word48PaddedLowerHex# w# = Unsafe.construct $ \arr off -> do
|
||||
writeByteArray arr off (toHexLower (unsafeShiftR w 44))
|
||||
writeByteArray arr (off + 1) (toHexLower (unsafeShiftR w 40))
|
||||
writeByteArray arr (off + 2) (toHexLower (unsafeShiftR w 36))
|
||||
writeByteArray arr (off + 3) (toHexLower (unsafeShiftR w 32))
|
||||
writeByteArray arr (off + 4) (toHexLower (unsafeShiftR w 28))
|
||||
writeByteArray arr (off + 5) (toHexLower (unsafeShiftR w 24))
|
||||
writeByteArray arr (off + 6) (toHexLower (unsafeShiftR w 20))
|
||||
writeByteArray arr (off + 7) (toHexLower (unsafeShiftR w 16))
|
||||
writeByteArray arr (off + 8) (toHexLower (unsafeShiftR w 12))
|
||||
writeByteArray arr (off + 9) (toHexLower (unsafeShiftR w 8))
|
||||
writeByteArray arr (off + 10) (toHexLower (unsafeShiftR w 4))
|
||||
writeByteArray arr (off + 11) (toHexLower w)
|
||||
pure (off + 12)
|
||||
where
|
||||
w = W# w#
|
||||
|
||||
-- TODO: Is it actually worth unrolling this loop. I suspect that it
|
||||
-- might not be. Benchmark this.
|
||||
word64PaddedLowerHex# :: Word# -> Builder 16
|
||||
|
@ -752,6 +794,22 @@ ascii6 (C# c0) (C# c1) (C# c2) (C# c3) (C# c4) (C# c5) = Unsafe.construct $ \(Mu
|
|||
primitive_ (writeCharArray# arr (off +# 5# ) c5)
|
||||
pure (I# (off +# 6# ))
|
||||
|
||||
-- | Encode a machine-sized word with LEB-128.
|
||||
wordLEB128 :: Word -> Builder 10
|
||||
wordLEB128 (W# w) = lebCommon (W# w)
|
||||
|
||||
-- | Encode a 64-bit word with LEB-128.
|
||||
word64LEB128 :: Word64 -> Builder 10
|
||||
word64LEB128 (W64# w) = lebCommon (W# w)
|
||||
|
||||
lebCommon :: Word -> Builder n
|
||||
lebCommon !w = case quotRem w 128 of
|
||||
(q,r) -> case q of
|
||||
0 -> unsafeWord8 (unsafeWordToWord8 r)
|
||||
_ -> unsafeAppend
|
||||
(unsafeWord8 (unsafeWordToWord8 (r .|. 0x80)))
|
||||
(lebCommon q)
|
||||
|
||||
-- | Encode a character as UTF-8. This only uses as much space as is required.
|
||||
char :: Char -> Builder 4
|
||||
char c
|
||||
|
@ -913,6 +971,11 @@ word8 w = Unsafe.construct $ \arr off -> do
|
|||
writeByteArray arr off w
|
||||
pure (off + 1)
|
||||
|
||||
unsafeWord8 :: Word8 -> Builder n
|
||||
unsafeWord8 w = Unsafe.construct $ \arr off -> do
|
||||
writeByteArray arr off w
|
||||
pure (off + 1)
|
||||
|
||||
-- Reverse the bytes in the designated slice. This takes
|
||||
-- an inclusive start offset and an inclusive end offset.
|
||||
reverseBytes :: MutableByteArray s -> Int -> Int -> ST s ()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue