Release 0.3.16.0 with Avro module and a few minor additions

This commit is contained in:
Andrew Martin 2024-01-29 17:08:31 -05:00
parent 50dbfc111d
commit a5cdbf965f
5 changed files with 108 additions and 2 deletions

View file

@ -5,6 +5,13 @@ Note: Prior to version 0.3.4.0, this library was named
`small-bytearray-builder` is now just a compatibility shim
to ease the migration process.
## 0.3.16.0 -- 2024-01-29
* Add `wordPaddedDec3`.
* Add `Data.Bytes.Builder.Avro`.
* Add `word16LEB128`.
* Stop accepting versions of text lower than 2.0.
## 0.3.15.0 -- 2024-01-05
* Add `Data.Bytes.Builder.Unsafe.pasteUtf8TextJson#` for users who need

View file

@ -1,6 +1,6 @@
cabal-version: 2.2
name: bytebuild
version: 0.3.15.0
version: 0.3.16.0
synopsis: Build byte arrays
description:
This is similar to the builder facilities provided by
@ -37,6 +37,7 @@ flag checked
library
exposed-modules:
Data.Bytes.Builder
Data.Bytes.Builder.Avro
Data.Bytes.Builder.Class
Data.Bytes.Builder.Template
Data.Bytes.Builder.Unsafe
@ -58,7 +59,7 @@ library
, primitive-offset >=0.2 && <0.3
, run-st >=0.1.2 && <0.2
, template-haskell >=2.16
, text >=1.2 && <2.2
, text >=2.0 && <2.2
, text-short >=0.1.3 && <0.2
, wide-word >=0.1.0.9 && <0.2
, zigzag

View file

@ -105,6 +105,7 @@ module Data.Bytes.Builder
, int32LEB128
, int64LEB128
, wordLEB128
, word16LEB128
, word32LEB128
, word64LEB128
-- **** VLQ
@ -1219,6 +1220,11 @@ wordLEB128 :: Word -> Builder
{-# inline wordLEB128 #-}
wordLEB128 w = fromBounded Nat.constant (Bounded.wordLEB128 w)
-- | Encode a 16-bit word with LEB-128.
word16LEB128 :: Word16 -> Builder
{-# inline word16LEB128 #-}
word16LEB128 w = fromBounded Nat.constant (Bounded.word16LEB128 w)
-- | Encode a 32-bit word with LEB-128.
word32LEB128 :: Word32 -> Builder
{-# inline word32LEB128 #-}

View file

@ -0,0 +1,77 @@
{-# language BangPatterns #-}
-- | Builders for encoding data with Apache Avro. Most functions in this
-- module are just aliases for other functions. Avro uses zig-zag LEB128
-- for all integral types.
module Data.Bytes.Builder.Avro
( int
, int32
, int64
, word16
, word32
, word128
, bytes
, chunks
, text
-- * Maps
, map2
) where
import Data.Int
import Data.Word
import Data.Bytes.Builder (Builder)
import Data.Text (Text)
import Data.Bytes (Bytes)
import Data.WideWord (Word128)
import Data.Bytes.Chunks (Chunks)
import qualified Data.Bytes as Bytes
import qualified Data.Bytes.Chunks as Chunks
import qualified Data.Bytes.Builder as B
import qualified Data.Bytes.Text.Utf8 as Utf8
int32 :: Int32 -> Builder
int32 = B.int32LEB128
int64 :: Int64 -> Builder
int64 = B.int64LEB128
int :: Int -> Builder
int = B.intLEB128
-- | Note: This results in a zigzag encoded number. Avro does not have
-- unsigned types.
word16 :: Word16 -> Builder
word16 = B.int32LEB128 . fromIntegral
-- | Note: This results in a zigzag encoded number. Avro does not have
-- unsigned types.
word32 :: Word32 -> Builder
word32 = B.int64LEB128 . fromIntegral
-- | Note: This results in a @fixed@ encoded value of length 16. In the
-- schema, the type must be @{"type": "fixed", "name": "...", "size": 16}@.
-- A big-endian encoding is used.
word128 :: Word128 -> Builder
word128 = B.word128BE
bytes :: Bytes -> Builder
bytes !b = int (Bytes.length b) <> B.bytes b
chunks :: Chunks -> Builder
chunks !b = int (Chunks.length b) <> B.chunks b
text :: Text -> Builder
text = bytes . Utf8.fromText
-- | Encode a map with exactly two key-value pairs. The keys are text.
-- This is commonly used to encode the header in an avro file, which has
-- a map with two keys: @avro.schema@ and @avro.codec@.
map2 ::
Text -- ^ First key
-> Builder -- ^ First value (already encoded)
-> Text -- ^ Second key
-> Builder -- ^ Second value (already encoded)
-> Builder
{-# inline map2 #-}
map2 k1 v1 k2 v2 = B.word8 0x04 <> text k1 <> v1 <> text k2 <> v2 <> B.word8 0x00

View file

@ -74,6 +74,7 @@ module Data.Bytes.Builder.Bounded
, char
-- ** Native
, wordPaddedDec2
, wordPaddedDec3
, wordPaddedDec4
, wordPaddedDec9
-- ** Machine-Readable
@ -102,6 +103,7 @@ module Data.Bytes.Builder.Bounded
-- with the high bit of each output byte set to 1 in all bytes except for
-- the final byte.
, wordLEB128
, word16LEB128
, word32LEB128
, word64LEB128
-- **** VLQ
@ -788,6 +790,14 @@ wordPaddedDec4 !w = Unsafe.construct $ \arr off -> do
) arr (off + 3) w
pure (off + 4)
wordPaddedDec3 :: Word -> Builder 3
wordPaddedDec3 !w = Unsafe.construct $ \arr off -> do
putRem10
(putRem10 $ putRem10
(\_ _ _ -> pure ())
) arr (off + 2) w
pure (off + 3)
-- | Encode a number less than 1e9 as a decimal number, zero-padding it to
-- nine digits. For example: 0 is encoded as @000000000@ and 5 is encoded as
-- @000000005@.
@ -923,6 +933,11 @@ wordLEB128 :: Word -> Builder 10
{-# inline wordLEB128 #-}
wordLEB128 (W# w) = lebCommon (W# w)
-- | Encode a 32-bit word with LEB-128.
word16LEB128 :: Word16 -> Builder 3
{-# inline word16LEB128 #-}
word16LEB128 (W16# w) = lebCommon (W# (C.word16ToWord# w))
-- | Encode a 32-bit word with LEB-128.
word32LEB128 :: Word32 -> Builder 5
{-# inline word32LEB128 #-}