Release 0.3.16.0 with Avro module and a few minor additions
This commit is contained in:
parent
50dbfc111d
commit
a5cdbf965f
5 changed files with 108 additions and 2 deletions
|
@ -5,6 +5,13 @@ Note: Prior to version 0.3.4.0, this library was named
|
||||||
`small-bytearray-builder` is now just a compatibility shim
|
`small-bytearray-builder` is now just a compatibility shim
|
||||||
to ease the migration process.
|
to ease the migration process.
|
||||||
|
|
||||||
|
## 0.3.16.0 -- 2024-01-29
|
||||||
|
|
||||||
|
* Add `wordPaddedDec3`.
|
||||||
|
* Add `Data.Bytes.Builder.Avro`.
|
||||||
|
* Add `word16LEB128`.
|
||||||
|
* Stop accepting versions of text lower than 2.0.
|
||||||
|
|
||||||
## 0.3.15.0 -- 2024-01-05
|
## 0.3.15.0 -- 2024-01-05
|
||||||
|
|
||||||
* Add `Data.Bytes.Builder.Unsafe.pasteUtf8TextJson#` for users who need
|
* Add `Data.Bytes.Builder.Unsafe.pasteUtf8TextJson#` for users who need
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
cabal-version: 2.2
|
cabal-version: 2.2
|
||||||
name: bytebuild
|
name: bytebuild
|
||||||
version: 0.3.15.0
|
version: 0.3.16.0
|
||||||
synopsis: Build byte arrays
|
synopsis: Build byte arrays
|
||||||
description:
|
description:
|
||||||
This is similar to the builder facilities provided by
|
This is similar to the builder facilities provided by
|
||||||
|
@ -37,6 +37,7 @@ flag checked
|
||||||
library
|
library
|
||||||
exposed-modules:
|
exposed-modules:
|
||||||
Data.Bytes.Builder
|
Data.Bytes.Builder
|
||||||
|
Data.Bytes.Builder.Avro
|
||||||
Data.Bytes.Builder.Class
|
Data.Bytes.Builder.Class
|
||||||
Data.Bytes.Builder.Template
|
Data.Bytes.Builder.Template
|
||||||
Data.Bytes.Builder.Unsafe
|
Data.Bytes.Builder.Unsafe
|
||||||
|
@ -58,7 +59,7 @@ library
|
||||||
, primitive-offset >=0.2 && <0.3
|
, primitive-offset >=0.2 && <0.3
|
||||||
, run-st >=0.1.2 && <0.2
|
, run-st >=0.1.2 && <0.2
|
||||||
, template-haskell >=2.16
|
, template-haskell >=2.16
|
||||||
, text >=1.2 && <2.2
|
, text >=2.0 && <2.2
|
||||||
, text-short >=0.1.3 && <0.2
|
, text-short >=0.1.3 && <0.2
|
||||||
, wide-word >=0.1.0.9 && <0.2
|
, wide-word >=0.1.0.9 && <0.2
|
||||||
, zigzag
|
, zigzag
|
||||||
|
|
|
@ -105,6 +105,7 @@ module Data.Bytes.Builder
|
||||||
, int32LEB128
|
, int32LEB128
|
||||||
, int64LEB128
|
, int64LEB128
|
||||||
, wordLEB128
|
, wordLEB128
|
||||||
|
, word16LEB128
|
||||||
, word32LEB128
|
, word32LEB128
|
||||||
, word64LEB128
|
, word64LEB128
|
||||||
-- **** VLQ
|
-- **** VLQ
|
||||||
|
@ -1219,6 +1220,11 @@ wordLEB128 :: Word -> Builder
|
||||||
{-# inline wordLEB128 #-}
|
{-# inline wordLEB128 #-}
|
||||||
wordLEB128 w = fromBounded Nat.constant (Bounded.wordLEB128 w)
|
wordLEB128 w = fromBounded Nat.constant (Bounded.wordLEB128 w)
|
||||||
|
|
||||||
|
-- | Encode a 16-bit word with LEB-128.
|
||||||
|
word16LEB128 :: Word16 -> Builder
|
||||||
|
{-# inline word16LEB128 #-}
|
||||||
|
word16LEB128 w = fromBounded Nat.constant (Bounded.word16LEB128 w)
|
||||||
|
|
||||||
-- | Encode a 32-bit word with LEB-128.
|
-- | Encode a 32-bit word with LEB-128.
|
||||||
word32LEB128 :: Word32 -> Builder
|
word32LEB128 :: Word32 -> Builder
|
||||||
{-# inline word32LEB128 #-}
|
{-# inline word32LEB128 #-}
|
||||||
|
|
77
src/Data/Bytes/Builder/Avro.hs
Normal file
77
src/Data/Bytes/Builder/Avro.hs
Normal file
|
@ -0,0 +1,77 @@
|
||||||
|
{-# language BangPatterns #-}
|
||||||
|
|
||||||
|
-- | Builders for encoding data with Apache Avro. Most functions in this
|
||||||
|
-- module are just aliases for other functions. Avro uses zig-zag LEB128
|
||||||
|
-- for all integral types.
|
||||||
|
module Data.Bytes.Builder.Avro
|
||||||
|
( int
|
||||||
|
, int32
|
||||||
|
, int64
|
||||||
|
, word16
|
||||||
|
, word32
|
||||||
|
, word128
|
||||||
|
, bytes
|
||||||
|
, chunks
|
||||||
|
, text
|
||||||
|
-- * Maps
|
||||||
|
, map2
|
||||||
|
) where
|
||||||
|
|
||||||
|
import Data.Int
|
||||||
|
import Data.Word
|
||||||
|
import Data.Bytes.Builder (Builder)
|
||||||
|
import Data.Text (Text)
|
||||||
|
import Data.Bytes (Bytes)
|
||||||
|
import Data.WideWord (Word128)
|
||||||
|
import Data.Bytes.Chunks (Chunks)
|
||||||
|
|
||||||
|
import qualified Data.Bytes as Bytes
|
||||||
|
import qualified Data.Bytes.Chunks as Chunks
|
||||||
|
import qualified Data.Bytes.Builder as B
|
||||||
|
import qualified Data.Bytes.Text.Utf8 as Utf8
|
||||||
|
|
||||||
|
int32 :: Int32 -> Builder
|
||||||
|
int32 = B.int32LEB128
|
||||||
|
|
||||||
|
int64 :: Int64 -> Builder
|
||||||
|
int64 = B.int64LEB128
|
||||||
|
|
||||||
|
int :: Int -> Builder
|
||||||
|
int = B.intLEB128
|
||||||
|
|
||||||
|
-- | Note: This results in a zigzag encoded number. Avro does not have
|
||||||
|
-- unsigned types.
|
||||||
|
word16 :: Word16 -> Builder
|
||||||
|
word16 = B.int32LEB128 . fromIntegral
|
||||||
|
|
||||||
|
-- | Note: This results in a zigzag encoded number. Avro does not have
|
||||||
|
-- unsigned types.
|
||||||
|
word32 :: Word32 -> Builder
|
||||||
|
word32 = B.int64LEB128 . fromIntegral
|
||||||
|
|
||||||
|
-- | Note: This results in a @fixed@ encoded value of length 16. In the
|
||||||
|
-- schema, the type must be @{"type": "fixed", "name": "...", "size": 16}@.
|
||||||
|
-- A big-endian encoding is used.
|
||||||
|
word128 :: Word128 -> Builder
|
||||||
|
word128 = B.word128BE
|
||||||
|
|
||||||
|
bytes :: Bytes -> Builder
|
||||||
|
bytes !b = int (Bytes.length b) <> B.bytes b
|
||||||
|
|
||||||
|
chunks :: Chunks -> Builder
|
||||||
|
chunks !b = int (Chunks.length b) <> B.chunks b
|
||||||
|
|
||||||
|
text :: Text -> Builder
|
||||||
|
text = bytes . Utf8.fromText
|
||||||
|
|
||||||
|
-- | Encode a map with exactly two key-value pairs. The keys are text.
|
||||||
|
-- This is commonly used to encode the header in an avro file, which has
|
||||||
|
-- a map with two keys: @avro.schema@ and @avro.codec@.
|
||||||
|
map2 ::
|
||||||
|
Text -- ^ First key
|
||||||
|
-> Builder -- ^ First value (already encoded)
|
||||||
|
-> Text -- ^ Second key
|
||||||
|
-> Builder -- ^ Second value (already encoded)
|
||||||
|
-> Builder
|
||||||
|
{-# inline map2 #-}
|
||||||
|
map2 k1 v1 k2 v2 = B.word8 0x04 <> text k1 <> v1 <> text k2 <> v2 <> B.word8 0x00
|
|
@ -74,6 +74,7 @@ module Data.Bytes.Builder.Bounded
|
||||||
, char
|
, char
|
||||||
-- ** Native
|
-- ** Native
|
||||||
, wordPaddedDec2
|
, wordPaddedDec2
|
||||||
|
, wordPaddedDec3
|
||||||
, wordPaddedDec4
|
, wordPaddedDec4
|
||||||
, wordPaddedDec9
|
, wordPaddedDec9
|
||||||
-- ** Machine-Readable
|
-- ** Machine-Readable
|
||||||
|
@ -102,6 +103,7 @@ module Data.Bytes.Builder.Bounded
|
||||||
-- with the high bit of each output byte set to 1 in all bytes except for
|
-- with the high bit of each output byte set to 1 in all bytes except for
|
||||||
-- the final byte.
|
-- the final byte.
|
||||||
, wordLEB128
|
, wordLEB128
|
||||||
|
, word16LEB128
|
||||||
, word32LEB128
|
, word32LEB128
|
||||||
, word64LEB128
|
, word64LEB128
|
||||||
-- **** VLQ
|
-- **** VLQ
|
||||||
|
@ -788,6 +790,14 @@ wordPaddedDec4 !w = Unsafe.construct $ \arr off -> do
|
||||||
) arr (off + 3) w
|
) arr (off + 3) w
|
||||||
pure (off + 4)
|
pure (off + 4)
|
||||||
|
|
||||||
|
wordPaddedDec3 :: Word -> Builder 3
|
||||||
|
wordPaddedDec3 !w = Unsafe.construct $ \arr off -> do
|
||||||
|
putRem10
|
||||||
|
(putRem10 $ putRem10
|
||||||
|
(\_ _ _ -> pure ())
|
||||||
|
) arr (off + 2) w
|
||||||
|
pure (off + 3)
|
||||||
|
|
||||||
-- | Encode a number less than 1e9 as a decimal number, zero-padding it to
|
-- | Encode a number less than 1e9 as a decimal number, zero-padding it to
|
||||||
-- nine digits. For example: 0 is encoded as @000000000@ and 5 is encoded as
|
-- nine digits. For example: 0 is encoded as @000000000@ and 5 is encoded as
|
||||||
-- @000000005@.
|
-- @000000005@.
|
||||||
|
@ -923,6 +933,11 @@ wordLEB128 :: Word -> Builder 10
|
||||||
{-# inline wordLEB128 #-}
|
{-# inline wordLEB128 #-}
|
||||||
wordLEB128 (W# w) = lebCommon (W# w)
|
wordLEB128 (W# w) = lebCommon (W# w)
|
||||||
|
|
||||||
|
-- | Encode a 32-bit word with LEB-128.
|
||||||
|
word16LEB128 :: Word16 -> Builder 3
|
||||||
|
{-# inline word16LEB128 #-}
|
||||||
|
word16LEB128 (W16# w) = lebCommon (W# (C.word16ToWord# w))
|
||||||
|
|
||||||
-- | Encode a 32-bit word with LEB-128.
|
-- | Encode a 32-bit word with LEB-128.
|
||||||
word32LEB128 :: Word32 -> Builder 5
|
word32LEB128 :: Word32 -> Builder 5
|
||||||
{-# inline word32LEB128 #-}
|
{-# inline word32LEB128 #-}
|
||||||
|
|
Loading…
Reference in a new issue