Add Data.Bytes.Builder.Unsafe.pasteUtf8TextJson#
This commit is contained in:
parent
7c86ace3d0
commit
0fcd93a9aa
4 changed files with 93 additions and 41 deletions
|
@ -5,6 +5,11 @@ Note: Prior to version 0.3.4.0, this library was named
|
||||||
`small-bytearray-builder` is now just a compatibility shim
|
`small-bytearray-builder` is now just a compatibility shim
|
||||||
to ease the migration process.
|
to ease the migration process.
|
||||||
|
|
||||||
|
## 0.3.15.0 -- 2023-??-??
|
||||||
|
|
||||||
|
* Add `Data.Bytes.Builder.Unsafe.pasteUtf8TextJson#` for users who need
|
||||||
|
to perform JSON string encoding without using a builder.
|
||||||
|
|
||||||
## 0.3.14.0 -- 2023-07-20
|
## 0.3.14.0 -- 2023-07-20
|
||||||
|
|
||||||
* Add `runOntoLength`.
|
* Add `runOntoLength`.
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
cabal-version: 2.2
|
cabal-version: 2.2
|
||||||
name: bytebuild
|
name: bytebuild
|
||||||
version: 0.3.14.0
|
version: 0.3.15.0
|
||||||
synopsis: Build byte arrays
|
synopsis: Build byte arrays
|
||||||
description:
|
description:
|
||||||
This is similar to the builder facilities provided by
|
This is similar to the builder facilities provided by
|
||||||
|
|
|
@ -159,10 +159,10 @@ import Data.Bytes.Builder.Unsafe (Commits(Initial,Mutable,Immutable))
|
||||||
import Data.Bytes.Builder.Unsafe (commitsOntoChunks)
|
import Data.Bytes.Builder.Unsafe (commitsOntoChunks)
|
||||||
import Data.Bytes.Builder.Unsafe (reverseCommitsOntoChunks)
|
import Data.Bytes.Builder.Unsafe (reverseCommitsOntoChunks)
|
||||||
import Data.Bytes.Builder.Unsafe (stringUtf8,cstring,fromEffect)
|
import Data.Bytes.Builder.Unsafe (stringUtf8,cstring,fromEffect)
|
||||||
|
import Data.Bytes.Builder.Unsafe (pasteUtf8TextJson#)
|
||||||
import Data.Bytes.Chunks (Chunks(ChunksCons,ChunksNil))
|
import Data.Bytes.Chunks (Chunks(ChunksCons,ChunksNil))
|
||||||
import Data.Bytes.Types (Bytes(Bytes),MutableBytes(MutableBytes))
|
import Data.Bytes.Types (Bytes(Bytes),MutableBytes(MutableBytes))
|
||||||
import Data.ByteString.Short.Internal (ShortByteString(SBS))
|
import Data.ByteString.Short.Internal (ShortByteString(SBS))
|
||||||
import Data.Char (ord)
|
|
||||||
import Data.Foldable (foldlM)
|
import Data.Foldable (foldlM)
|
||||||
import Data.Int (Int64,Int32,Int16,Int8)
|
import Data.Int (Int64,Int32,Int16,Int8)
|
||||||
import Data.Primitive (ByteArray(..),MutableByteArray(..),PrimArray(..))
|
import Data.Primitive (ByteArray(..),MutableByteArray(..),PrimArray(..))
|
||||||
|
@ -173,7 +173,7 @@ import Data.Word.Zigzag (toZigzagNative,toZigzag32,toZigzag64)
|
||||||
import Foreign.C.String (CStringLen)
|
import Foreign.C.String (CStringLen)
|
||||||
import GHC.ByteOrder (ByteOrder(BigEndian,LittleEndian),targetByteOrder)
|
import GHC.ByteOrder (ByteOrder(BigEndian,LittleEndian),targetByteOrder)
|
||||||
import GHC.Exts (MutableByteArray#,Addr#,(*#),oneShot)
|
import GHC.Exts (MutableByteArray#,Addr#,(*#),oneShot)
|
||||||
import GHC.Exts (Int(I#),Char(C#),Int#,State#,ByteArray#,(>=#))
|
import GHC.Exts (Int(I#),Int#,State#,ByteArray#,(>=#))
|
||||||
import GHC.Exts (RealWorld,(+#),(-#),(<#))
|
import GHC.Exts (RealWorld,(+#),(-#),(<#))
|
||||||
import GHC.Integer.Logarithms.Compat (integerLog2#)
|
import GHC.Integer.Logarithms.Compat (integerLog2#)
|
||||||
import GHC.IO (IO(IO),stToIO)
|
import GHC.IO (IO(IO),stToIO)
|
||||||
|
@ -795,38 +795,18 @@ asWord8s (PrimArray x) = PrimArray x
|
||||||
-- byte sequence is UTF-8 encoded text.
|
-- byte sequence is UTF-8 encoded text.
|
||||||
slicedUtf8TextJson :: ByteArray# -> Int# -> Int# -> Builder
|
slicedUtf8TextJson :: ByteArray# -> Int# -> Int# -> Builder
|
||||||
{-# noinline slicedUtf8TextJson #-}
|
{-# noinline slicedUtf8TextJson #-}
|
||||||
slicedUtf8TextJson !src# !soff0# !slen0# = fromFunction reqLen $ \dst doff0 -> do
|
slicedUtf8TextJson !src# !soff0# !slen0# = fromFunction# reqLen#
|
||||||
PM.writeByteArray dst doff0 (c2w '"')
|
( \dst# doff0# s0# -> pasteUtf8TextJson# src# soff0# slen0# dst# doff0# s0# )
|
||||||
let go !soff !slen !doff = if slen > 0
|
|
||||||
then case indexChar8Array (ByteArray src#) soff of
|
|
||||||
'\\' -> write2 dst doff '\\' '\\' *> go (soff + 1) (slen - 1) (doff + 2)
|
|
||||||
'\"' -> write2 dst doff '\\' '\"' *> go (soff + 1) (slen - 1) (doff + 2)
|
|
||||||
c -> if c >= '\x20'
|
|
||||||
then PM.writeByteArray dst doff (c2w c) *> go (soff + 1) (slen - 1) (doff + 1)
|
|
||||||
else case c of
|
|
||||||
'\n' -> write2 dst doff '\\' 'n' *> go (soff + 1) (slen - 1) (doff + 2)
|
|
||||||
'\r' -> write2 dst doff '\\' 'r' *> go (soff + 1) (slen - 1) (doff + 2)
|
|
||||||
'\t' -> write2 dst doff '\\' 't' *> go (soff + 1) (slen - 1) (doff + 2)
|
|
||||||
_ -> do
|
|
||||||
write2 dst doff '\\' 'u'
|
|
||||||
doff' <- UnsafeBounded.pasteST
|
|
||||||
(Bounded.word16PaddedUpperHex (fromIntegral (c2w c)))
|
|
||||||
dst (doff + 2)
|
|
||||||
go (soff + 1) (slen - 1) doff'
|
|
||||||
else pure doff
|
|
||||||
doffRes <- go (I# soff0#) (I# slen0#) (doff0 + 1)
|
|
||||||
PM.writeByteArray dst doffRes (c2w '"')
|
|
||||||
pure (doffRes + 1)
|
|
||||||
where
|
where
|
||||||
slen0 = I# slen0#
|
|
||||||
-- We multiply by 6 because, in the worst case, everything might be in the
|
-- We multiply by 6 because, in the worst case, everything might be in the
|
||||||
-- unprintable ASCII range. The plus 2 is for the quotes on the ends.
|
-- unprintable ASCII range. The plus 2 is for the quotes on the ends.
|
||||||
reqLen = (6 * slen0) + 2
|
!reqLen# = (6# *# slen0# ) +# 2#
|
||||||
|
|
||||||
-- | Constructor for 'Builder' that works on a function with lifted
|
-- | Constructor for 'Builder' that works on a function with lifted
|
||||||
-- arguments instead of unlifted ones. This is just as unsafe as the
|
-- arguments instead of unlifted ones. This is just as unsafe as the
|
||||||
-- actual constructor.
|
-- actual constructor.
|
||||||
fromFunction :: Int -> (forall s. MutableByteArray s -> Int -> ST s Int) -> Builder
|
fromFunction :: Int -> (forall s. MutableByteArray s -> Int -> ST s Int) -> Builder
|
||||||
|
{-# inline fromFunction #-}
|
||||||
fromFunction (I# req) f = Builder $ \buf0 off0 len0 cs0 s0 ->
|
fromFunction (I# req) f = Builder $ \buf0 off0 len0 cs0 s0 ->
|
||||||
let !(# s1, buf1, off1, len1, cs1 #) = case len0 >=# req of
|
let !(# s1, buf1, off1, len1, cs1 #) = case len0 >=# req of
|
||||||
1# -> (# s0, buf0, off0, len0, cs0 #)
|
1# -> (# s0, buf0, off0, len0, cs0 #)
|
||||||
|
@ -837,11 +817,17 @@ fromFunction (I# req) f = Builder $ \buf0 off0 len0 cs0 s0 ->
|
||||||
in case unST (f (MutableByteArray buf1) (I# off1)) s1 of
|
in case unST (f (MutableByteArray buf1) (I# off1)) s1 of
|
||||||
(# s2, I# off2 #) -> (# s2, buf1, off2, len1 -# (off2 -# off1), cs1 #)
|
(# s2, I# off2 #) -> (# s2, buf1, off2, len1 -# (off2 -# off1), cs1 #)
|
||||||
|
|
||||||
-- Internal. Write two characters in the ASCII plane to a byte array.
|
fromFunction# :: Int# -> (forall s. MutableByteArray# s -> Int# -> State# s -> (# State# s, Int# #) ) -> Builder
|
||||||
write2 :: MutableByteArray s -> Int -> Char -> Char -> ST s ()
|
{-# inline fromFunction# #-}
|
||||||
write2 marr ix a b = do
|
fromFunction# req f = Builder $ \buf0 off0 len0 cs0 s0 ->
|
||||||
PM.writeByteArray marr ix (c2w a)
|
let !(# s1, buf1, off1, len1, cs1 #) = case len0 >=# req of
|
||||||
PM.writeByteArray marr (ix + 1) (c2w b)
|
1# -> (# s0, buf0, off0, len0, cs0 #)
|
||||||
|
_ -> let !(I# lenX) = max 4080 (I# req) in
|
||||||
|
case Exts.newByteArray# lenX s0 of
|
||||||
|
(# sX, bufX #) ->
|
||||||
|
(# sX, bufX, 0#, lenX, Mutable buf0 off0 cs0 #)
|
||||||
|
in case f buf1 off1 s1 of
|
||||||
|
(# s2, off2 #) -> (# s2, buf1, off2, len1 -# (off2 -# off1), cs1 #)
|
||||||
|
|
||||||
-- | Create a builder from text. The text will be UTF-8 encoded.
|
-- | Create a builder from text. The text will be UTF-8 encoded.
|
||||||
shortTextUtf8 :: ShortText -> Builder
|
shortTextUtf8 :: ShortText -> Builder
|
||||||
|
@ -864,6 +850,7 @@ textUtf8 (I.Text (A.ByteArray b) off len) =
|
||||||
-- * @\\_"_\/ ==\> "\\\\_\\"_\/"@ (escapes backslashes and quotes)
|
-- * @\\_"_\/ ==\> "\\\\_\\"_\/"@ (escapes backslashes and quotes)
|
||||||
-- * @hello\<ESC\>world ==> "hello\\u001Bworld"@ (where @\<ESC\>@ is code point 0x1B)
|
-- * @hello\<ESC\>world ==> "hello\\u001Bworld"@ (where @\<ESC\>@ is code point 0x1B)
|
||||||
shortTextJsonString :: ShortText -> Builder
|
shortTextJsonString :: ShortText -> Builder
|
||||||
|
{-# inline shortTextJsonString #-}
|
||||||
shortTextJsonString a =
|
shortTextJsonString a =
|
||||||
let !(ByteArray ba) = shortTextToByteArray a
|
let !(ByteArray ba) = shortTextToByteArray a
|
||||||
!(I# len) = PM.sizeofByteArray (ByteArray ba)
|
!(I# len) = PM.sizeofByteArray (ByteArray ba)
|
||||||
|
@ -1204,23 +1191,20 @@ shortTextToByteArray :: ShortText -> ByteArray
|
||||||
shortTextToByteArray x = case TS.toShortByteString x of
|
shortTextToByteArray x = case TS.toShortByteString x of
|
||||||
SBS a -> ByteArray a
|
SBS a -> ByteArray a
|
||||||
|
|
||||||
indexChar8Array :: ByteArray -> Int -> Char
|
|
||||||
indexChar8Array (ByteArray b) (I# i) = C# (Exts.indexCharArray# b i)
|
|
||||||
|
|
||||||
c2w :: Char -> Word8
|
|
||||||
c2w = fromIntegral . ord
|
|
||||||
|
|
||||||
-- | Encode a signed machine-sized integer with LEB-128. This uses
|
-- | Encode a signed machine-sized integer with LEB-128. This uses
|
||||||
-- zig-zag encoding.
|
-- zig-zag encoding.
|
||||||
intLEB128 :: Int -> Builder
|
intLEB128 :: Int -> Builder
|
||||||
|
{-# inline intLEB128 #-}
|
||||||
intLEB128 = wordLEB128 . toZigzagNative
|
intLEB128 = wordLEB128 . toZigzagNative
|
||||||
|
|
||||||
-- | Encode a 32-bit signed integer with LEB-128. This uses zig-zag encoding.
|
-- | Encode a 32-bit signed integer with LEB-128. This uses zig-zag encoding.
|
||||||
int32LEB128 :: Int32 -> Builder
|
int32LEB128 :: Int32 -> Builder
|
||||||
|
{-# inline int32LEB128 #-}
|
||||||
int32LEB128 = word32LEB128 . toZigzag32
|
int32LEB128 = word32LEB128 . toZigzag32
|
||||||
|
|
||||||
-- | Encode a 64-bit signed integer with LEB-128. This uses zig-zag encoding.
|
-- | Encode a 64-bit signed integer with LEB-128. This uses zig-zag encoding.
|
||||||
int64LEB128 :: Int64 -> Builder
|
int64LEB128 :: Int64 -> Builder
|
||||||
|
{-# inline int64LEB128 #-}
|
||||||
int64LEB128 = word64LEB128 . toZigzag64
|
int64LEB128 = word64LEB128 . toZigzag64
|
||||||
|
|
||||||
-- | Encode a machine-sized word with LEB-128.
|
-- | Encode a machine-sized word with LEB-128.
|
||||||
|
|
|
@ -33,22 +33,25 @@ module Data.Bytes.Builder.Unsafe
|
||||||
-- @Data.Bytes.Builder@ instead.
|
-- @Data.Bytes.Builder@ instead.
|
||||||
, stringUtf8
|
, stringUtf8
|
||||||
, cstring
|
, cstring
|
||||||
|
-- * Pasting with Preconditions
|
||||||
|
, pasteUtf8TextJson#
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import Control.Monad.Primitive (primitive_)
|
import Control.Monad.Primitive (primitive_)
|
||||||
import Data.Bytes.Chunks (Chunks(ChunksCons))
|
import Data.Bytes.Chunks (Chunks(ChunksCons))
|
||||||
import Data.Bytes.Types (Bytes(Bytes))
|
import Data.Bytes.Types (Bytes(Bytes))
|
||||||
|
import Data.Char (ord)
|
||||||
import Data.Primitive (MutableByteArray(..),ByteArray(..))
|
import Data.Primitive (MutableByteArray(..),ByteArray(..))
|
||||||
|
import Data.Word (Word8)
|
||||||
import Foreign.C.String (CString)
|
import Foreign.C.String (CString)
|
||||||
import GHC.Base (unpackCString#,unpackCStringUtf8#)
|
import GHC.Base (unpackCString#,unpackCStringUtf8#)
|
||||||
import GHC.Exts ((-#),(+#),(>#),(>=#))
|
import GHC.Exts ((-#),(+#),(>#),(>=#),Char(C#))
|
||||||
import GHC.Exts (Addr#,ByteArray#,MutableByteArray#,Int(I#),Ptr(Ptr))
|
import GHC.Exts (Addr#,ByteArray#,MutableByteArray#,Int(I#),Ptr(Ptr))
|
||||||
import GHC.Exts (RealWorld,IsString,Int#,State#)
|
import GHC.Exts (RealWorld,IsString,Int#,State#)
|
||||||
import GHC.ST (ST(ST))
|
|
||||||
import GHC.IO (stToIO)
|
import GHC.IO (stToIO)
|
||||||
|
import GHC.ST (ST(ST))
|
||||||
|
|
||||||
import qualified Compat as C
|
import qualified Compat as C
|
||||||
|
|
||||||
import qualified Data.Bytes.Builder.Bounded as Bounded
|
import qualified Data.Bytes.Builder.Bounded as Bounded
|
||||||
import qualified Data.Bytes.Builder.Bounded.Unsafe as UnsafeBounded
|
import qualified Data.Bytes.Builder.Bounded.Unsafe as UnsafeBounded
|
||||||
import qualified Data.Primitive as PM
|
import qualified Data.Primitive as PM
|
||||||
|
@ -311,3 +314,63 @@ commitDistance target !n (Mutable buf len cs) =
|
||||||
case Exts.sameMutableByteArray# target buf of
|
case Exts.sameMutableByteArray# target buf of
|
||||||
1# -> n +# len
|
1# -> n +# len
|
||||||
_ -> commitDistance target (n +# len) cs
|
_ -> commitDistance target (n +# len) cs
|
||||||
|
|
||||||
|
-- | Encode (UTF-8 encoded) text as a JSON string, wrapping it in double quotes.
|
||||||
|
-- This escapes all characters with code points below @0x20@.
|
||||||
|
--
|
||||||
|
-- * Precondition: The slice of the byte argument is UTF-8 encoded text.
|
||||||
|
-- * Precondition: There is enough space in the buffer for the result
|
||||||
|
-- to be written to. A simple way to ensure enough space is to allocate
|
||||||
|
-- @3N + 2@ bytes, where N is the length of the argument. However, the
|
||||||
|
-- caller may use clever heuristics to find a lower upper bound.
|
||||||
|
-- * Result: The next offset in the destination buffer
|
||||||
|
pasteUtf8TextJson# ::
|
||||||
|
ByteArray# -- ^ source
|
||||||
|
-> Int# -- ^ source offset
|
||||||
|
-> Int# -- ^ source length
|
||||||
|
-> MutableByteArray# s -- ^ destination buffer
|
||||||
|
-> Int# -- ^ offset into destination buffer
|
||||||
|
-> State# s -- ^ state token
|
||||||
|
-> (# State# s, Int# #) -- returns next destination offset
|
||||||
|
{-# noinline pasteUtf8TextJson# #-}
|
||||||
|
pasteUtf8TextJson# src# soff0# slen0# dst# doff0# s0# =
|
||||||
|
let ST f = do
|
||||||
|
let dst = MutableByteArray dst#
|
||||||
|
let doff0 = I# doff0#
|
||||||
|
PM.writeByteArray dst doff0 (c2w '"')
|
||||||
|
let go !soff !slen !doff = if slen > 0
|
||||||
|
then case indexChar8Array (ByteArray src#) soff of
|
||||||
|
'\\' -> write2 dst doff '\\' '\\' *> go (soff + 1) (slen - 1) (doff + 2)
|
||||||
|
'\"' -> write2 dst doff '\\' '\"' *> go (soff + 1) (slen - 1) (doff + 2)
|
||||||
|
c -> if c >= '\x20'
|
||||||
|
then PM.writeByteArray dst doff (c2w c) *> go (soff + 1) (slen - 1) (doff + 1)
|
||||||
|
else case c of
|
||||||
|
'\n' -> write2 dst doff '\\' 'n' *> go (soff + 1) (slen - 1) (doff + 2)
|
||||||
|
'\r' -> write2 dst doff '\\' 'r' *> go (soff + 1) (slen - 1) (doff + 2)
|
||||||
|
'\t' -> write2 dst doff '\\' 't' *> go (soff + 1) (slen - 1) (doff + 2)
|
||||||
|
_ -> do
|
||||||
|
write2 dst doff '\\' 'u'
|
||||||
|
doff' <- UnsafeBounded.pasteST
|
||||||
|
(Bounded.word16PaddedUpperHex (fromIntegral (c2w c)))
|
||||||
|
dst (doff + 2)
|
||||||
|
go (soff + 1) (slen - 1) doff'
|
||||||
|
else pure doff
|
||||||
|
doffRes <- go (I# soff0#) (I# slen0#) (doff0 + 1)
|
||||||
|
PM.writeByteArray dst doffRes (c2w '"')
|
||||||
|
pure (doffRes + 1)
|
||||||
|
!(# !s1, I# dstFinal #) = f s0#
|
||||||
|
in (# s1, dstFinal #)
|
||||||
|
|
||||||
|
c2w :: Char -> Word8
|
||||||
|
{-# inline c2w #-}
|
||||||
|
c2w = fromIntegral . ord
|
||||||
|
|
||||||
|
-- Internal. Write two characters in the ASCII plane to a byte array.
|
||||||
|
write2 :: MutableByteArray s -> Int -> Char -> Char -> ST s ()
|
||||||
|
write2 marr ix a b = do
|
||||||
|
PM.writeByteArray marr ix (c2w a)
|
||||||
|
PM.writeByteArray marr (ix + 1) (c2w b)
|
||||||
|
|
||||||
|
indexChar8Array :: ByteArray -> Int -> Char
|
||||||
|
{-# inline indexChar8Array #-}
|
||||||
|
indexChar8Array (ByteArray b) (I# i) = C# (Exts.indexCharArray# b i)
|
||||||
|
|
Loading…
Reference in a new issue