Add wordLEB128, word64LEB128, integerDec, naturalDec, and word48PaddedLowerHex
This commit is contained in:
parent
2ce46c4c4a
commit
d39c76a65a
5 changed files with 240 additions and 8 deletions
|
@ -5,6 +5,12 @@ Note: Prior to version 0.3.4.0, this library was named
|
||||||
`small-bytearray-builder` is now just a compatibility shim
|
`small-bytearray-builder` is now just a compatibility shim
|
||||||
to ease the migration process.
|
to ease the migration process.
|
||||||
|
|
||||||
|
## 0.3.5.0 -- 2020-??-??
|
||||||
|
|
||||||
|
* Add `wordLEB128` and `word64LEB128`.
|
||||||
|
* Add `integerDec` and `naturalDec`.
|
||||||
|
* Add `word48PaddedLowerHex`.
|
||||||
|
|
||||||
## 0.3.4.0 -- 2020-02-27
|
## 0.3.4.0 -- 2020-02-27
|
||||||
|
|
||||||
* Rename the library from `small-bytearray-builder` to `bytebuild`, and
|
* Rename the library from `small-bytearray-builder` to `bytebuild`, and
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
cabal-version: 2.2
|
cabal-version: 2.2
|
||||||
name: bytebuild
|
name: bytebuild
|
||||||
version: 0.3.4.0
|
version: 0.3.5.0
|
||||||
synopsis: Serialize to a small byte arrays
|
synopsis: Serialize to a small byte arrays
|
||||||
description:
|
description:
|
||||||
This is similar to the builder facilities provided by
|
This is similar to the builder facilities provided by
|
||||||
|
@ -46,6 +46,7 @@ library
|
||||||
, base >=4.12.0.0 && <5
|
, base >=4.12.0.0 && <5
|
||||||
, byteslice >=0.2 && <0.3
|
, byteslice >=0.2 && <0.3
|
||||||
, bytestring >=0.10.8.2 && <0.11
|
, bytestring >=0.10.8.2 && <0.11
|
||||||
|
, integer-logarithms >=1.0.3 && <1.1
|
||||||
, natural-arithmetic >=0.1 && <0.2
|
, natural-arithmetic >=0.1 && <0.2
|
||||||
, primitive-offset >=0.2 && <0.3
|
, primitive-offset >=0.2 && <0.3
|
||||||
, primitive-unlifted >=0.1.2 && <0.2
|
, primitive-unlifted >=0.1.2 && <0.2
|
||||||
|
@ -79,6 +80,7 @@ test-suite test
|
||||||
, primitive
|
, primitive
|
||||||
, primitive-unlifted >=0.1.2
|
, primitive-unlifted >=0.1.2
|
||||||
, quickcheck-classes >=0.6.4
|
, quickcheck-classes >=0.6.4
|
||||||
|
, quickcheck-instances >=0.3.22
|
||||||
, tasty >=1.2.3 && <1.3
|
, tasty >=1.2.3 && <1.3
|
||||||
, tasty-hunit >=0.10.0.2 && <0.11
|
, tasty-hunit >=0.10.0.2 && <0.11
|
||||||
, tasty-quickcheck >=0.10.1 && <0.11
|
, tasty-quickcheck >=0.10.1 && <0.11
|
||||||
|
|
|
@ -3,8 +3,10 @@
|
||||||
{-# language DuplicateRecordFields #-}
|
{-# language DuplicateRecordFields #-}
|
||||||
{-# language LambdaCase #-}
|
{-# language LambdaCase #-}
|
||||||
{-# language MagicHash #-}
|
{-# language MagicHash #-}
|
||||||
|
{-# language NumericUnderscores #-}
|
||||||
{-# language RankNTypes #-}
|
{-# language RankNTypes #-}
|
||||||
{-# language ScopedTypeVariables #-}
|
{-# language ScopedTypeVariables #-}
|
||||||
|
{-# language TypeApplications #-}
|
||||||
{-# language UnboxedTuples #-}
|
{-# language UnboxedTuples #-}
|
||||||
|
|
||||||
module Data.Bytes.Builder
|
module Data.Bytes.Builder
|
||||||
|
@ -36,11 +38,13 @@ module Data.Bytes.Builder
|
||||||
, word16Dec
|
, word16Dec
|
||||||
, word8Dec
|
, word8Dec
|
||||||
, wordDec
|
, wordDec
|
||||||
|
, naturalDec
|
||||||
, int64Dec
|
, int64Dec
|
||||||
, int32Dec
|
, int32Dec
|
||||||
, int16Dec
|
, int16Dec
|
||||||
, int8Dec
|
, int8Dec
|
||||||
, intDec
|
, intDec
|
||||||
|
, integerDec
|
||||||
-- * Unsigned Words
|
-- * Unsigned Words
|
||||||
-- ** 64-bit
|
-- ** 64-bit
|
||||||
, word64PaddedUpperHex
|
, word64PaddedUpperHex
|
||||||
|
@ -82,6 +86,10 @@ module Data.Bytes.Builder
|
||||||
, int64LE
|
, int64LE
|
||||||
, int32LE
|
, int32LE
|
||||||
, int16LE
|
, int16LE
|
||||||
|
-- **** LEB128
|
||||||
|
, intLEB128
|
||||||
|
, wordLEB128
|
||||||
|
, word64LEB128
|
||||||
-- *** Many
|
-- *** Many
|
||||||
, word8Array
|
, word8Array
|
||||||
-- **** Big Endian
|
-- **** Big Endian
|
||||||
|
@ -117,12 +125,13 @@ module Data.Bytes.Builder
|
||||||
import Control.Exception (SomeException,toException)
|
import Control.Exception (SomeException,toException)
|
||||||
import Control.Monad.ST (ST,runST)
|
import Control.Monad.ST (ST,runST)
|
||||||
import Control.Monad.IO.Class (MonadIO,liftIO)
|
import Control.Monad.IO.Class (MonadIO,liftIO)
|
||||||
|
import Data.Bits (unsafeShiftR,unsafeShiftL,xor,finiteBitSize)
|
||||||
import Data.Bytes.Builder.Unsafe (Builder(Builder))
|
import Data.Bytes.Builder.Unsafe (Builder(Builder))
|
||||||
import Data.Bytes.Builder.Unsafe (BuilderState(BuilderState),pasteIO)
|
import Data.Bytes.Builder.Unsafe (BuilderState(BuilderState),pasteIO)
|
||||||
import Data.Bytes.Builder.Unsafe (Commits(Initial,Mutable,Immutable))
|
import Data.Bytes.Builder.Unsafe (Commits(Initial,Mutable,Immutable))
|
||||||
import Data.Bytes.Builder.Unsafe (reverseCommitsOntoChunks)
|
import Data.Bytes.Builder.Unsafe (reverseCommitsOntoChunks)
|
||||||
import Data.Bytes.Builder.Unsafe (commitsOntoChunks)
|
import Data.Bytes.Builder.Unsafe (commitsOntoChunks)
|
||||||
import Data.Bytes.Builder.Unsafe (stringUtf8,cstring)
|
import Data.Bytes.Builder.Unsafe (stringUtf8,cstring,fromEffect)
|
||||||
import Data.Bytes.Builder.Unsafe (addCommitsLength,copyReverseCommits)
|
import Data.Bytes.Builder.Unsafe (addCommitsLength,copyReverseCommits)
|
||||||
import Data.ByteString.Short.Internal (ShortByteString(SBS))
|
import Data.ByteString.Short.Internal (ShortByteString(SBS))
|
||||||
import Data.Bytes.Chunks (Chunks(ChunksNil))
|
import Data.Bytes.Chunks (Chunks(ChunksNil))
|
||||||
|
@ -138,8 +147,13 @@ import Foreign.C.String (CStringLen)
|
||||||
import GHC.ByteOrder (ByteOrder(BigEndian,LittleEndian),targetByteOrder)
|
import GHC.ByteOrder (ByteOrder(BigEndian,LittleEndian),targetByteOrder)
|
||||||
import GHC.Exts (Int(I#),Char(C#),Int#,State#,ByteArray#,(>=#))
|
import GHC.Exts (Int(I#),Char(C#),Int#,State#,ByteArray#,(>=#))
|
||||||
import GHC.Exts (RealWorld,MutableByteArray#,(+#),(-#),(<#))
|
import GHC.Exts (RealWorld,MutableByteArray#,(+#),(-#),(<#))
|
||||||
|
import GHC.Exts ((*#))
|
||||||
|
import GHC.Integer.Logarithms.Compat (integerLog2#)
|
||||||
import GHC.IO (IO(IO),stToIO)
|
import GHC.IO (IO(IO),stToIO)
|
||||||
|
import GHC.Natural (naturalFromInteger,naturalToInteger)
|
||||||
import GHC.ST (ST(ST))
|
import GHC.ST (ST(ST))
|
||||||
|
import GHC.Word (Word(W#),Word8(W8#))
|
||||||
|
import Numeric.Natural (Natural)
|
||||||
|
|
||||||
import qualified Arithmetic.Nat as Nat
|
import qualified Arithmetic.Nat as Nat
|
||||||
import qualified Arithmetic.Types as Arithmetic
|
import qualified Arithmetic.Types as Arithmetic
|
||||||
|
@ -909,7 +923,6 @@ word32LE w = fromBounded Nat.constant (Bounded.word32LE w)
|
||||||
word16LE :: Word16 -> Builder
|
word16LE :: Word16 -> Builder
|
||||||
word16LE w = fromBounded Nat.constant (Bounded.word16LE w)
|
word16LE w = fromBounded Nat.constant (Bounded.word16LE w)
|
||||||
|
|
||||||
|
|
||||||
-- | Requires exactly 32 bytes. Dump the octets of a 256-bit
|
-- | Requires exactly 32 bytes. Dump the octets of a 256-bit
|
||||||
-- word in a big-endian fashion.
|
-- word in a big-endian fashion.
|
||||||
word256BE :: Word256 -> Builder
|
word256BE :: Word256 -> Builder
|
||||||
|
@ -1043,3 +1056,119 @@ indexChar8Array (ByteArray b) (I# i) = C# (Exts.indexCharArray# b i)
|
||||||
|
|
||||||
c2w :: Char -> Word8
|
c2w :: Char -> Word8
|
||||||
c2w = fromIntegral . ord
|
c2w = fromIntegral . ord
|
||||||
|
|
||||||
|
-- In C, this is: (n << 1) ^ (n >> (BIT_WIDTH - 1))
|
||||||
|
zigZagNative :: Int -> Word
|
||||||
|
zigZagNative s = fromIntegral @Int @Word
|
||||||
|
((unsafeShiftL s 1) `xor` (unsafeShiftR s (finiteBitSize (undefined :: Word) - 1)))
|
||||||
|
|
||||||
|
-- | Encode a signed machine-sized integer with LEB-128. This uses
|
||||||
|
-- zig-zag encoding.
|
||||||
|
intLEB128 :: Int -> Builder
|
||||||
|
intLEB128 = wordLEB128 . zigZagNative
|
||||||
|
|
||||||
|
-- | Encode a machine-sized word with LEB-128.
|
||||||
|
wordLEB128 :: Word -> Builder
|
||||||
|
wordLEB128 w = fromBounded Nat.constant (Bounded.wordLEB128 w)
|
||||||
|
|
||||||
|
-- | Encode a 64-bit word with LEB-128.
|
||||||
|
word64LEB128 :: Word64 -> Builder
|
||||||
|
word64LEB128 w = fromBounded Nat.constant (Bounded.word64LEB128 w)
|
||||||
|
|
||||||
|
-- | Encode a signed arbitrary-precision integer as decimal.
|
||||||
|
-- This encoding never starts with a zero unless the argument was zero.
|
||||||
|
-- Negative numbers are preceded by a minus sign. Positive numbers
|
||||||
|
-- are not preceded by anything.
|
||||||
|
integerDec :: Integer -> Builder
|
||||||
|
integerDec !i
|
||||||
|
| i < 0 = ascii '-' <> naturalDec (naturalFromInteger (negate i))
|
||||||
|
| otherwise = naturalDec (naturalFromInteger i)
|
||||||
|
|
||||||
|
-- | Encodes an unsigned arbitrary-precision integer as decimal.
|
||||||
|
-- This encoding never starts with a zero unless the argument was zero.
|
||||||
|
naturalDec :: Natural -> Builder
|
||||||
|
naturalDec !n0 = fromEffect
|
||||||
|
(I# (11# +# (3# *# integerLog2# (naturalToInteger n0))))
|
||||||
|
(\marr off -> case n0 of
|
||||||
|
0 -> do
|
||||||
|
PM.writeByteArray marr off (0x30 :: Word8)
|
||||||
|
pure (off + 1)
|
||||||
|
_ -> go n0 marr off off
|
||||||
|
)
|
||||||
|
where
|
||||||
|
go :: forall s. Natural -> MutableByteArray s -> Int -> Int -> ST s Int
|
||||||
|
go !n !buf !off0 !off = case quotRem n 1_000_000_000 of
|
||||||
|
(q,r) -> case q of
|
||||||
|
0 -> do
|
||||||
|
off' <- backwardsWordLoop buf off (fromIntegral @Natural @Word r)
|
||||||
|
reverseBytes buf off0 (off' - 1)
|
||||||
|
pure off'
|
||||||
|
_ -> do
|
||||||
|
off' <- backwardsPasteWordPaddedDec9
|
||||||
|
(fromIntegral @Natural @Word r) buf off
|
||||||
|
go q buf off0 off'
|
||||||
|
|
||||||
|
-- Reverse the bytes in the designated slice. This takes
|
||||||
|
-- an inclusive start offset and an inclusive end offset.
|
||||||
|
reverseBytes :: MutableByteArray s -> Int -> Int -> ST s ()
|
||||||
|
{-# inline reverseBytes #-}
|
||||||
|
reverseBytes arr begin end = go begin end where
|
||||||
|
go ixA ixB = if ixA < ixB
|
||||||
|
then do
|
||||||
|
a :: Word8 <- PM.readByteArray arr ixA
|
||||||
|
b :: Word8 <- PM.readByteArray arr ixB
|
||||||
|
PM.writeByteArray arr ixA b
|
||||||
|
PM.writeByteArray arr ixB a
|
||||||
|
go (ixA + 1) (ixB - 1)
|
||||||
|
else pure ()
|
||||||
|
|
||||||
|
backwardsPasteWordPaddedDec9 ::
|
||||||
|
Word -> MutableByteArray s -> Int -> ST s Int
|
||||||
|
backwardsPasteWordPaddedDec9 !w !arr !off = do
|
||||||
|
backwardsPutRem10
|
||||||
|
(backwardsPutRem10 $ backwardsPutRem10 $ backwardsPutRem10 $
|
||||||
|
backwardsPutRem10 $ backwardsPutRem10 $ backwardsPutRem10 $
|
||||||
|
backwardsPutRem10 $ backwardsPutRem10
|
||||||
|
(\_ _ _ -> pure ())
|
||||||
|
) arr off w
|
||||||
|
pure (off + 9)
|
||||||
|
|
||||||
|
backwardsPutRem10 ::
|
||||||
|
(MutableByteArray s -> Int -> Word -> ST s a)
|
||||||
|
-> MutableByteArray s -> Int -> Word -> ST s a
|
||||||
|
{-# inline backwardsPutRem10 #-}
|
||||||
|
backwardsPutRem10 andThen arr off dividend = do
|
||||||
|
let quotient = approxDiv10 dividend
|
||||||
|
remainder = dividend - (10 * quotient)
|
||||||
|
PM.writeByteArray arr off (unsafeWordToWord8 (remainder + 48))
|
||||||
|
andThen arr (off + 1) quotient
|
||||||
|
|
||||||
|
backwardsWordLoop :: MutableByteArray s -> Int -> Word -> ST s Int
|
||||||
|
{-# inline backwardsWordLoop #-}
|
||||||
|
backwardsWordLoop arr off0 x0 = go off0 x0 where
|
||||||
|
go !off !(x :: Word) = if x > 0
|
||||||
|
then do
|
||||||
|
let (y,z) = quotRem x 10
|
||||||
|
PM.writeByteArray arr off (fromIntegral (z + 0x30) :: Word8)
|
||||||
|
go (off + 1) y
|
||||||
|
else pure off
|
||||||
|
|
||||||
|
-- Based on C code from https://stackoverflow.com/a/5558614
|
||||||
|
-- For numbers less than 1073741829, this gives a correct answer.
|
||||||
|
approxDiv10 :: Word -> Word
|
||||||
|
approxDiv10 !n = unsafeShiftR (0x1999999A * n) 32
|
||||||
|
|
||||||
|
-- -- A weird beast useful for rewrite rules. Not yet used. This will
|
||||||
|
-- -- ultimately replace fromEffect and fromBounded.
|
||||||
|
-- require :: Int -> Builder
|
||||||
|
-- require !n = Builder $ \buf0 off0 len0 cs0 s0 ->
|
||||||
|
-- let !(I# req) = n
|
||||||
|
-- in case len0 >=# req of
|
||||||
|
-- 1# -> (# s0, buf0, off0, len0, cs0 #)
|
||||||
|
-- _ -> let !(I# lenX) = max 4080 (I# req) in
|
||||||
|
-- case Exts.newByteArray# lenX s0 of
|
||||||
|
-- (# sX, bufX #) ->
|
||||||
|
-- (# sX, bufX, 0#, lenX, Mutable buf0 off0 cs0 #)
|
||||||
|
|
||||||
|
unsafeWordToWord8 :: Word -> Word8
|
||||||
|
unsafeWordToWord8 (W# w) = W8# w
|
||||||
|
|
|
@ -45,6 +45,8 @@ module Data.Bytes.Builder.Bounded
|
||||||
-- ** 64-bit
|
-- ** 64-bit
|
||||||
, word64PaddedLowerHex
|
, word64PaddedLowerHex
|
||||||
, word64PaddedUpperHex
|
, word64PaddedUpperHex
|
||||||
|
-- ** 48-bit
|
||||||
|
, word48PaddedLowerHex
|
||||||
-- ** 32-bit
|
-- ** 32-bit
|
||||||
, word32PaddedLowerHex
|
, word32PaddedLowerHex
|
||||||
, word32PaddedUpperHex
|
, word32PaddedUpperHex
|
||||||
|
@ -89,6 +91,9 @@ module Data.Bytes.Builder.Bounded
|
||||||
, int64LE
|
, int64LE
|
||||||
, int32LE
|
, int32LE
|
||||||
, int16LE
|
, int16LE
|
||||||
|
-- **** LEB128
|
||||||
|
, wordLEB128
|
||||||
|
, word64LEB128
|
||||||
-- * Encode Floating-Point Types
|
-- * Encode Floating-Point Types
|
||||||
, doubleDec
|
, doubleDec
|
||||||
) where
|
) where
|
||||||
|
@ -161,7 +166,10 @@ infixr 9 `append`
|
||||||
|
|
||||||
-- | Concatenate two builders.
|
-- | Concatenate two builders.
|
||||||
append :: Builder m -> Builder n -> Builder (m + n)
|
append :: Builder m -> Builder n -> Builder (m + n)
|
||||||
append (Builder f) (Builder g) =
|
append = unsafeAppend
|
||||||
|
|
||||||
|
unsafeAppend :: Builder m -> Builder n -> Builder p
|
||||||
|
unsafeAppend (Builder f) (Builder g) =
|
||||||
Builder $ \arr off0 s0 -> case f arr off0 s0 of
|
Builder $ \arr off0 s0 -> case f arr off0 s0 of
|
||||||
(# s1, r #) -> g arr r s1
|
(# s1, r #) -> g arr r s1
|
||||||
|
|
||||||
|
@ -286,15 +294,20 @@ wordCommonDec# w# = Unsafe.construct $ \arr off0 -> if w /= 0
|
||||||
|
|
||||||
internalWordLoop :: MutableByteArray s -> Int -> Word -> ST s Int
|
internalWordLoop :: MutableByteArray s -> Int -> Word -> ST s Int
|
||||||
{-# inline internalWordLoop #-}
|
{-# inline internalWordLoop #-}
|
||||||
internalWordLoop arr off0 x0 = go off0 x0 where
|
internalWordLoop arr off0 x0 = do
|
||||||
|
off1 <- backwardsWordLoop arr off0 x0
|
||||||
|
reverseBytes arr off0 (off1 - 1)
|
||||||
|
pure off1
|
||||||
|
|
||||||
|
backwardsWordLoop :: MutableByteArray s -> Int -> Word -> ST s Int
|
||||||
|
{-# inline backwardsWordLoop #-}
|
||||||
|
backwardsWordLoop arr off0 x0 = go off0 x0 where
|
||||||
go !off !(x :: Word) = if x > 0
|
go !off !(x :: Word) = if x > 0
|
||||||
then do
|
then do
|
||||||
let (y,z) = quotRem x 10
|
let (y,z) = quotRem x 10
|
||||||
writeByteArray arr off (fromIntegral (z + 0x30) :: Word8)
|
writeByteArray arr off (fromIntegral (z + 0x30) :: Word8)
|
||||||
go (off + 1) y
|
go (off + 1) y
|
||||||
else do
|
else pure off
|
||||||
reverseBytes arr off0 (off - 1)
|
|
||||||
pure off
|
|
||||||
|
|
||||||
-- Requires up to 20 bytes. Can be less depending on what the
|
-- Requires up to 20 bytes. Can be less depending on what the
|
||||||
-- size of the argument is known to be. Unsafe.
|
-- size of the argument is known to be. Unsafe.
|
||||||
|
@ -390,6 +403,14 @@ word64PaddedUpperHex (W64# w) = word64PaddedUpperHex# w
|
||||||
word64PaddedLowerHex :: Word64 -> Builder 16
|
word64PaddedLowerHex :: Word64 -> Builder 16
|
||||||
word64PaddedLowerHex (W64# w) = word64PaddedLowerHex# w
|
word64PaddedLowerHex (W64# w) = word64PaddedLowerHex# w
|
||||||
|
|
||||||
|
-- | Requires exactly 12 bytes. Discards the upper 16 bits of a
|
||||||
|
-- 64-bit unsigned integer and then encodes the lower 48 bits as
|
||||||
|
-- hexadecimal, zero-padding the encoding to 12 digits. This uses
|
||||||
|
-- lowercase for the alphabetical digits. For example, this encodes the
|
||||||
|
-- number 1022 as @0000000003fe@.
|
||||||
|
word48PaddedLowerHex :: Word64 -> Builder 12
|
||||||
|
word48PaddedLowerHex (W64# w) = word48PaddedLowerHex# w
|
||||||
|
|
||||||
-- | Requires exactly 8 bytes. Encodes a 32-bit unsigned integer as
|
-- | Requires exactly 8 bytes. Encodes a 32-bit unsigned integer as
|
||||||
-- hexadecimal, zero-padding the encoding to 8 digits. This uses
|
-- hexadecimal, zero-padding the encoding to 8 digits. This uses
|
||||||
-- uppercase for the alphabetical digits.
|
-- uppercase for the alphabetical digits.
|
||||||
|
@ -481,6 +502,27 @@ word64PaddedUpperHex# w# = Unsafe.construct $ \arr off -> do
|
||||||
where
|
where
|
||||||
w = W# w#
|
w = W# w#
|
||||||
|
|
||||||
|
-- TODO: Is it actually worth unrolling this loop. I suspect that it
|
||||||
|
-- might not be. Benchmark this.
|
||||||
|
word48PaddedLowerHex# :: Word# -> Builder 12
|
||||||
|
{-# noinline word48PaddedLowerHex# #-}
|
||||||
|
word48PaddedLowerHex# w# = Unsafe.construct $ \arr off -> do
|
||||||
|
writeByteArray arr off (toHexLower (unsafeShiftR w 44))
|
||||||
|
writeByteArray arr (off + 1) (toHexLower (unsafeShiftR w 40))
|
||||||
|
writeByteArray arr (off + 2) (toHexLower (unsafeShiftR w 36))
|
||||||
|
writeByteArray arr (off + 3) (toHexLower (unsafeShiftR w 32))
|
||||||
|
writeByteArray arr (off + 4) (toHexLower (unsafeShiftR w 28))
|
||||||
|
writeByteArray arr (off + 5) (toHexLower (unsafeShiftR w 24))
|
||||||
|
writeByteArray arr (off + 6) (toHexLower (unsafeShiftR w 20))
|
||||||
|
writeByteArray arr (off + 7) (toHexLower (unsafeShiftR w 16))
|
||||||
|
writeByteArray arr (off + 8) (toHexLower (unsafeShiftR w 12))
|
||||||
|
writeByteArray arr (off + 9) (toHexLower (unsafeShiftR w 8))
|
||||||
|
writeByteArray arr (off + 10) (toHexLower (unsafeShiftR w 4))
|
||||||
|
writeByteArray arr (off + 11) (toHexLower w)
|
||||||
|
pure (off + 12)
|
||||||
|
where
|
||||||
|
w = W# w#
|
||||||
|
|
||||||
-- TODO: Is it actually worth unrolling this loop. I suspect that it
|
-- TODO: Is it actually worth unrolling this loop. I suspect that it
|
||||||
-- might not be. Benchmark this.
|
-- might not be. Benchmark this.
|
||||||
word64PaddedLowerHex# :: Word# -> Builder 16
|
word64PaddedLowerHex# :: Word# -> Builder 16
|
||||||
|
@ -752,6 +794,22 @@ ascii6 (C# c0) (C# c1) (C# c2) (C# c3) (C# c4) (C# c5) = Unsafe.construct $ \(Mu
|
||||||
primitive_ (writeCharArray# arr (off +# 5# ) c5)
|
primitive_ (writeCharArray# arr (off +# 5# ) c5)
|
||||||
pure (I# (off +# 6# ))
|
pure (I# (off +# 6# ))
|
||||||
|
|
||||||
|
-- | Encode a machine-sized word with LEB-128.
|
||||||
|
wordLEB128 :: Word -> Builder 10
|
||||||
|
wordLEB128 (W# w) = lebCommon (W# w)
|
||||||
|
|
||||||
|
-- | Encode a 64-bit word with LEB-128.
|
||||||
|
word64LEB128 :: Word64 -> Builder 10
|
||||||
|
word64LEB128 (W64# w) = lebCommon (W# w)
|
||||||
|
|
||||||
|
lebCommon :: Word -> Builder n
|
||||||
|
lebCommon !w = case quotRem w 128 of
|
||||||
|
(q,r) -> case q of
|
||||||
|
0 -> unsafeWord8 (unsafeWordToWord8 r)
|
||||||
|
_ -> unsafeAppend
|
||||||
|
(unsafeWord8 (unsafeWordToWord8 (r .|. 0x80)))
|
||||||
|
(lebCommon q)
|
||||||
|
|
||||||
-- | Encode a character as UTF-8. This only uses as much space as is required.
|
-- | Encode a character as UTF-8. This only uses as much space as is required.
|
||||||
char :: Char -> Builder 4
|
char :: Char -> Builder 4
|
||||||
char c
|
char c
|
||||||
|
@ -913,6 +971,11 @@ word8 w = Unsafe.construct $ \arr off -> do
|
||||||
writeByteArray arr off w
|
writeByteArray arr off w
|
||||||
pure (off + 1)
|
pure (off + 1)
|
||||||
|
|
||||||
|
unsafeWord8 :: Word8 -> Builder n
|
||||||
|
unsafeWord8 w = Unsafe.construct $ \arr off -> do
|
||||||
|
writeByteArray arr off w
|
||||||
|
pure (off + 1)
|
||||||
|
|
||||||
-- Reverse the bytes in the designated slice. This takes
|
-- Reverse the bytes in the designated slice. This takes
|
||||||
-- an inclusive start offset and an inclusive end offset.
|
-- an inclusive start offset and an inclusive end offset.
|
||||||
reverseBytes :: MutableByteArray s -> Int -> Int -> ST s ()
|
reverseBytes :: MutableByteArray s -> Int -> Int -> ST s ()
|
||||||
|
|
32
test/Main.hs
32
test/Main.hs
|
@ -15,13 +15,17 @@ import Data.Char (ord,chr)
|
||||||
import Data.IORef (IORef,newIORef,readIORef,writeIORef)
|
import Data.IORef (IORef,newIORef,readIORef,writeIORef)
|
||||||
import Data.Primitive (ByteArray)
|
import Data.Primitive (ByteArray)
|
||||||
import Data.WideWord (Word128(Word128),Word256(Word256))
|
import Data.WideWord (Word128(Word128),Word256(Word256))
|
||||||
|
import Numeric.Natural (Natural)
|
||||||
import Test.Tasty (defaultMain,testGroup,TestTree)
|
import Test.Tasty (defaultMain,testGroup,TestTree)
|
||||||
import Test.QuickCheck ((===),Arbitrary)
|
import Test.QuickCheck ((===),Arbitrary)
|
||||||
|
import Test.QuickCheck.Instances.Natural ()
|
||||||
import Text.Printf (printf)
|
import Text.Printf (printf)
|
||||||
import Test.Tasty.HUnit ((@=?))
|
import Test.Tasty.HUnit ((@=?))
|
||||||
|
|
||||||
import qualified Arithmetic.Nat as Nat
|
import qualified Arithmetic.Nat as Nat
|
||||||
|
import qualified Data.Bits as Bits
|
||||||
import qualified Data.Bytes.Builder.Bounded as Bounded
|
import qualified Data.Bytes.Builder.Bounded as Bounded
|
||||||
|
import qualified Data.Bytes as Bytes
|
||||||
import qualified Data.ByteString as ByteString
|
import qualified Data.ByteString as ByteString
|
||||||
import qualified Data.ByteString.Builder as BB
|
import qualified Data.ByteString.Builder as BB
|
||||||
import qualified Data.ByteString.Lazy.Char8 as LB
|
import qualified Data.ByteString.Lazy.Char8 as LB
|
||||||
|
@ -215,6 +219,19 @@ tests = testGroup "Tests"
|
||||||
in runConcat 1 (foldMap word256BE xs)
|
in runConcat 1 (foldMap word256BE xs)
|
||||||
===
|
===
|
||||||
runConcat 1 (word256ArrayBE ys 0 (Prelude.length xs))
|
runConcat 1 (word256ArrayBE ys 0 (Prelude.length xs))
|
||||||
|
, TQC.testProperty "word64LEB128" $ \(x :: Word64) ->
|
||||||
|
runConcat 1 (word64LEB128 x)
|
||||||
|
===
|
||||||
|
naiveLeb128 (fromIntegral x)
|
||||||
|
, TQC.testProperty "naturalDec-A" $ \(x :: Natural) ->
|
||||||
|
runConcat 1 (naturalDec x)
|
||||||
|
===
|
||||||
|
pack (show x)
|
||||||
|
, TQC.testProperty "naturalDec-B" $ \(x :: Natural) ->
|
||||||
|
let y = 1234567892345678934678987654321 * x in
|
||||||
|
runConcat 1 (naturalDec y)
|
||||||
|
===
|
||||||
|
pack (show y)
|
||||||
]
|
]
|
||||||
, testGroup "alternate"
|
, testGroup "alternate"
|
||||||
[ TQC.testProperty "HexWord64" $ \x y ->
|
[ TQC.testProperty "HexWord64" $ \x y ->
|
||||||
|
@ -306,3 +323,18 @@ zeroPadL :: Int -> String -> String
|
||||||
zeroPadL n s
|
zeroPadL n s
|
||||||
| length s < n = replicate (n - length s) '0' ++ s
|
| length s < n = replicate (n - length s) '0' ++ s
|
||||||
| otherwise = s
|
| otherwise = s
|
||||||
|
|
||||||
|
naiveLeb128 :: Natural -> ByteArray
|
||||||
|
naiveLeb128 x =
|
||||||
|
Bytes.toByteArray (Bytes.unsafeDrop 1 (Exts.fromList (0xFF : go [] x)))
|
||||||
|
where
|
||||||
|
go !xs !n =
|
||||||
|
let (q,r) = quotRem n 128
|
||||||
|
r' = fromIntegral @Natural @Word8 r
|
||||||
|
w = if q == 0
|
||||||
|
then r'
|
||||||
|
else Bits.setBit r' 7
|
||||||
|
xs' = w : xs
|
||||||
|
in if q == 0
|
||||||
|
then L.reverse xs'
|
||||||
|
else go xs' q
|
||||||
|
|
Loading…
Reference in a new issue