Release 0.3.16.0 with Avro module and a few minor additions
This commit is contained in:
parent
50dbfc111d
commit
a5cdbf965f
5 changed files with 108 additions and 2 deletions
|
@ -5,6 +5,13 @@ Note: Prior to version 0.3.4.0, this library was named
|
|||
`small-bytearray-builder` is now just a compatibility shim
|
||||
to ease the migration process.
|
||||
|
||||
## 0.3.16.0 -- 2024-01-29
|
||||
|
||||
* Add `wordPaddedDec3`.
|
||||
* Add `Data.Bytes.Builder.Avro`.
|
||||
* Add `word16LEB128`.
|
||||
* Stop accepting versions of text lower than 2.0.
|
||||
|
||||
## 0.3.15.0 -- 2024-01-05
|
||||
|
||||
* Add `Data.Bytes.Builder.Unsafe.pasteUtf8TextJson#` for users who need
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
cabal-version: 2.2
|
||||
name: bytebuild
|
||||
version: 0.3.15.0
|
||||
version: 0.3.16.0
|
||||
synopsis: Build byte arrays
|
||||
description:
|
||||
This is similar to the builder facilities provided by
|
||||
|
@ -37,6 +37,7 @@ flag checked
|
|||
library
|
||||
exposed-modules:
|
||||
Data.Bytes.Builder
|
||||
Data.Bytes.Builder.Avro
|
||||
Data.Bytes.Builder.Class
|
||||
Data.Bytes.Builder.Template
|
||||
Data.Bytes.Builder.Unsafe
|
||||
|
@ -58,7 +59,7 @@ library
|
|||
, primitive-offset >=0.2 && <0.3
|
||||
, run-st >=0.1.2 && <0.2
|
||||
, template-haskell >=2.16
|
||||
, text >=1.2 && <2.2
|
||||
, text >=2.0 && <2.2
|
||||
, text-short >=0.1.3 && <0.2
|
||||
, wide-word >=0.1.0.9 && <0.2
|
||||
, zigzag
|
||||
|
|
|
@ -105,6 +105,7 @@ module Data.Bytes.Builder
|
|||
, int32LEB128
|
||||
, int64LEB128
|
||||
, wordLEB128
|
||||
, word16LEB128
|
||||
, word32LEB128
|
||||
, word64LEB128
|
||||
-- **** VLQ
|
||||
|
@ -1219,6 +1220,11 @@ wordLEB128 :: Word -> Builder
|
|||
{-# inline wordLEB128 #-}
|
||||
wordLEB128 w = fromBounded Nat.constant (Bounded.wordLEB128 w)
|
||||
|
||||
-- | Encode a 16-bit word with LEB-128.
|
||||
word16LEB128 :: Word16 -> Builder
|
||||
{-# inline word16LEB128 #-}
|
||||
word16LEB128 w = fromBounded Nat.constant (Bounded.word16LEB128 w)
|
||||
|
||||
-- | Encode a 32-bit word with LEB-128.
|
||||
word32LEB128 :: Word32 -> Builder
|
||||
{-# inline word32LEB128 #-}
|
||||
|
|
77
src/Data/Bytes/Builder/Avro.hs
Normal file
77
src/Data/Bytes/Builder/Avro.hs
Normal file
|
@ -0,0 +1,77 @@
|
|||
{-# language BangPatterns #-}
|
||||
|
||||
-- | Builders for encoding data with Apache Avro. Most functions in this
|
||||
-- module are just aliases for other functions. Avro uses zig-zag LEB128
|
||||
-- for all integral types.
|
||||
module Data.Bytes.Builder.Avro
|
||||
( int
|
||||
, int32
|
||||
, int64
|
||||
, word16
|
||||
, word32
|
||||
, word128
|
||||
, bytes
|
||||
, chunks
|
||||
, text
|
||||
-- * Maps
|
||||
, map2
|
||||
) where
|
||||
|
||||
import Data.Int
|
||||
import Data.Word
|
||||
import Data.Bytes.Builder (Builder)
|
||||
import Data.Text (Text)
|
||||
import Data.Bytes (Bytes)
|
||||
import Data.WideWord (Word128)
|
||||
import Data.Bytes.Chunks (Chunks)
|
||||
|
||||
import qualified Data.Bytes as Bytes
|
||||
import qualified Data.Bytes.Chunks as Chunks
|
||||
import qualified Data.Bytes.Builder as B
|
||||
import qualified Data.Bytes.Text.Utf8 as Utf8
|
||||
|
||||
int32 :: Int32 -> Builder
|
||||
int32 = B.int32LEB128
|
||||
|
||||
int64 :: Int64 -> Builder
|
||||
int64 = B.int64LEB128
|
||||
|
||||
int :: Int -> Builder
|
||||
int = B.intLEB128
|
||||
|
||||
-- | Note: This results in a zigzag encoded number. Avro does not have
|
||||
-- unsigned types.
|
||||
word16 :: Word16 -> Builder
|
||||
word16 = B.int32LEB128 . fromIntegral
|
||||
|
||||
-- | Note: This results in a zigzag encoded number. Avro does not have
|
||||
-- unsigned types.
|
||||
word32 :: Word32 -> Builder
|
||||
word32 = B.int64LEB128 . fromIntegral
|
||||
|
||||
-- | Note: This results in a @fixed@ encoded value of length 16. In the
|
||||
-- schema, the type must be @{"type": "fixed", "name": "...", "size": 16}@.
|
||||
-- A big-endian encoding is used.
|
||||
word128 :: Word128 -> Builder
|
||||
word128 = B.word128BE
|
||||
|
||||
bytes :: Bytes -> Builder
|
||||
bytes !b = int (Bytes.length b) <> B.bytes b
|
||||
|
||||
chunks :: Chunks -> Builder
|
||||
chunks !b = int (Chunks.length b) <> B.chunks b
|
||||
|
||||
text :: Text -> Builder
|
||||
text = bytes . Utf8.fromText
|
||||
|
||||
-- | Encode a map with exactly two key-value pairs. The keys are text.
|
||||
-- This is commonly used to encode the header in an avro file, which has
|
||||
-- a map with two keys: @avro.schema@ and @avro.codec@.
|
||||
map2 ::
|
||||
Text -- ^ First key
|
||||
-> Builder -- ^ First value (already encoded)
|
||||
-> Text -- ^ Second key
|
||||
-> Builder -- ^ Second value (already encoded)
|
||||
-> Builder
|
||||
{-# inline map2 #-}
|
||||
map2 k1 v1 k2 v2 = B.word8 0x04 <> text k1 <> v1 <> text k2 <> v2 <> B.word8 0x00
|
|
@ -74,6 +74,7 @@ module Data.Bytes.Builder.Bounded
|
|||
, char
|
||||
-- ** Native
|
||||
, wordPaddedDec2
|
||||
, wordPaddedDec3
|
||||
, wordPaddedDec4
|
||||
, wordPaddedDec9
|
||||
-- ** Machine-Readable
|
||||
|
@ -102,6 +103,7 @@ module Data.Bytes.Builder.Bounded
|
|||
-- with the high bit of each output byte set to 1 in all bytes except for
|
||||
-- the final byte.
|
||||
, wordLEB128
|
||||
, word16LEB128
|
||||
, word32LEB128
|
||||
, word64LEB128
|
||||
-- **** VLQ
|
||||
|
@ -788,6 +790,14 @@ wordPaddedDec4 !w = Unsafe.construct $ \arr off -> do
|
|||
) arr (off + 3) w
|
||||
pure (off + 4)
|
||||
|
||||
wordPaddedDec3 :: Word -> Builder 3
|
||||
wordPaddedDec3 !w = Unsafe.construct $ \arr off -> do
|
||||
putRem10
|
||||
(putRem10 $ putRem10
|
||||
(\_ _ _ -> pure ())
|
||||
) arr (off + 2) w
|
||||
pure (off + 3)
|
||||
|
||||
-- | Encode a number less than 1e9 as a decimal number, zero-padding it to
|
||||
-- nine digits. For example: 0 is encoded as @000000000@ and 5 is encoded as
|
||||
-- @000000005@.
|
||||
|
@ -923,6 +933,11 @@ wordLEB128 :: Word -> Builder 10
|
|||
{-# inline wordLEB128 #-}
|
||||
wordLEB128 (W# w) = lebCommon (W# w)
|
||||
|
||||
-- | Encode a 32-bit word with LEB-128.
|
||||
word16LEB128 :: Word16 -> Builder 3
|
||||
{-# inline word16LEB128 #-}
|
||||
word16LEB128 (W16# w) = lebCommon (W# (C.word16ToWord# w))
|
||||
|
||||
-- | Encode a 32-bit word with LEB-128.
|
||||
word32LEB128 :: Word32 -> Builder 5
|
||||
{-# inline word32LEB128 #-}
|
||||
|
|
Loading…
Reference in a new issue