commit
6217c8b8e7
9 changed files with 405 additions and 157 deletions
|
@ -1,8 +1,10 @@
|
||||||
|
import Data.Primitive (ByteArray)
|
||||||
|
import Data.Word (Word64)
|
||||||
import Gauge (bgroup,bench,whnf)
|
import Gauge (bgroup,bench,whnf)
|
||||||
import Gauge.Main (defaultMain)
|
import Gauge.Main (defaultMain)
|
||||||
import Data.Word (Word64)
|
|
||||||
import Data.Primitive (ByteArray)
|
import qualified Arithmetic.Nat as Nat
|
||||||
import qualified Data.ByteArray.Builder.Small.Unsafe as U
|
import qualified Data.ByteArray.Builder.Bounded as U
|
||||||
|
|
||||||
import qualified HexWord64
|
import qualified HexWord64
|
||||||
|
|
||||||
|
@ -33,7 +35,7 @@ data Word64s = Word64s
|
||||||
|
|
||||||
encodeHexWord64s :: Word64s -> ByteArray
|
encodeHexWord64s :: Word64s -> ByteArray
|
||||||
{-# noinline encodeHexWord64s #-}
|
{-# noinline encodeHexWord64s #-}
|
||||||
encodeHexWord64s (Word64s a b c d e f g h) = U.run $
|
encodeHexWord64s (Word64s a b c d e f g h) = U.run Nat.constant $
|
||||||
U.word64PaddedUpperHex a `U.append`
|
U.word64PaddedUpperHex a `U.append`
|
||||||
U.word64PaddedUpperHex b `U.append`
|
U.word64PaddedUpperHex b `U.append`
|
||||||
U.word64PaddedUpperHex c `U.append`
|
U.word64PaddedUpperHex c `U.append`
|
||||||
|
@ -45,7 +47,7 @@ encodeHexWord64s (Word64s a b c d e f g h) = U.run $
|
||||||
|
|
||||||
encodeHexWord64sLoop :: Word64s -> ByteArray
|
encodeHexWord64sLoop :: Word64s -> ByteArray
|
||||||
{-# noinline encodeHexWord64sLoop #-}
|
{-# noinline encodeHexWord64sLoop #-}
|
||||||
encodeHexWord64sLoop (Word64s a b c d e f g h) = U.run $
|
encodeHexWord64sLoop (Word64s a b c d e f g h) = U.run Nat.constant $
|
||||||
HexWord64.word64PaddedUpperHex a `U.append`
|
HexWord64.word64PaddedUpperHex a `U.append`
|
||||||
HexWord64.word64PaddedUpperHex b `U.append`
|
HexWord64.word64PaddedUpperHex b `U.append`
|
||||||
HexWord64.word64PaddedUpperHex c `U.append`
|
HexWord64.word64PaddedUpperHex c `U.append`
|
||||||
|
|
5
cabal.project
Normal file
5
cabal.project
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
packages: .
|
||||||
|
source-repository-package
|
||||||
|
type: git
|
||||||
|
location: https://github.com/andrewthad/natural-arithmetic
|
||||||
|
tag: 68868c96b58ddaf71bb865b247d2c14c3668f4c2
|
|
@ -17,7 +17,7 @@ module HexWord64
|
||||||
|
|
||||||
import GHC.ST (ST(ST))
|
import GHC.ST (ST(ST))
|
||||||
import Data.Bits
|
import Data.Bits
|
||||||
import Data.ByteArray.Builder.Small.Unsafe (Builder,construct)
|
import Data.ByteArray.Builder.Bounded.Unsafe (Builder,construct)
|
||||||
import Data.Primitive
|
import Data.Primitive
|
||||||
import Data.Word
|
import Data.Word
|
||||||
import GHC.Exts
|
import GHC.Exts
|
||||||
|
|
|
@ -36,8 +36,10 @@ flag checked
|
||||||
|
|
||||||
library
|
library
|
||||||
exposed-modules:
|
exposed-modules:
|
||||||
Data.ByteArray.Builder.Small
|
Data.ByteArray.Builder
|
||||||
Data.ByteArray.Builder.Small.Unsafe
|
Data.ByteArray.Builder.Unsafe
|
||||||
|
Data.ByteArray.Builder.Bounded
|
||||||
|
Data.ByteArray.Builder.Bounded.Unsafe
|
||||||
build-depends:
|
build-depends:
|
||||||
, base >=4.12.0.0 && <5
|
, base >=4.12.0.0 && <5
|
||||||
, byteslice >=0.1 && <0.2
|
, byteslice >=0.1 && <0.2
|
||||||
|
@ -46,6 +48,7 @@ library
|
||||||
, vector >=0.12.0.3 && <0.13
|
, vector >=0.12.0.3 && <0.13
|
||||||
, bytestring >=0.10.8.2 && <0.11
|
, bytestring >=0.10.8.2 && <0.11
|
||||||
, text-short >=0.1.3 && <0.2
|
, text-short >=0.1.3 && <0.2
|
||||||
|
, natural-arithmetic >=0.1 && <0.2
|
||||||
if flag(checked)
|
if flag(checked)
|
||||||
build-depends: primitive-checked >= 0.7 && <0.8
|
build-depends: primitive-checked >= 0.7 && <0.8
|
||||||
else
|
else
|
||||||
|
@ -59,18 +62,21 @@ test-suite test
|
||||||
type: exitcode-stdio-1.0
|
type: exitcode-stdio-1.0
|
||||||
hs-source-dirs: test, common
|
hs-source-dirs: test, common
|
||||||
main-is: Main.hs
|
main-is: Main.hs
|
||||||
|
ghc-options: -O2 -Wall
|
||||||
other-modules:
|
other-modules:
|
||||||
HexWord64
|
HexWord64
|
||||||
build-depends:
|
build-depends:
|
||||||
|
, QuickCheck >=2.13.1 && <2.14
|
||||||
, base >=4.12.0.0 && <5
|
, base >=4.12.0.0 && <5
|
||||||
, byteslice
|
, byteslice
|
||||||
, bytestring
|
, bytestring
|
||||||
, small-bytearray-builder
|
, natural-arithmetic
|
||||||
, QuickCheck >=2.13.1 && <2.14
|
|
||||||
, tasty-quickcheck >=0.10.1 && <0.11
|
|
||||||
, tasty-hunit >=0.10.0.2 && <0.11
|
|
||||||
, tasty >=1.2.3 && <1.3
|
|
||||||
, primitive
|
, primitive
|
||||||
|
, small-bytearray-builder
|
||||||
|
, tasty >=1.2.3 && <1.3
|
||||||
|
, tasty-hunit >=0.10.0.2 && <0.11
|
||||||
|
, tasty-quickcheck >=0.10.1 && <0.11
|
||||||
|
, text >=1.2 && <1.3
|
||||||
, vector
|
, vector
|
||||||
|
|
||||||
benchmark bench
|
benchmark bench
|
||||||
|
@ -78,6 +84,7 @@ benchmark bench
|
||||||
build-depends:
|
build-depends:
|
||||||
, base
|
, base
|
||||||
, gauge >= 0.2.4
|
, gauge >= 0.2.4
|
||||||
|
, natural-arithmetic
|
||||||
, primitive
|
, primitive
|
||||||
, small-bytearray-builder
|
, small-bytearray-builder
|
||||||
ghc-options: -Wall -O2
|
ghc-options: -Wall -O2
|
||||||
|
|
|
@ -6,11 +6,11 @@
|
||||||
{-# language ScopedTypeVariables #-}
|
{-# language ScopedTypeVariables #-}
|
||||||
{-# language UnboxedTuples #-}
|
{-# language UnboxedTuples #-}
|
||||||
|
|
||||||
module Data.ByteArray.Builder.Small
|
module Data.ByteArray.Builder
|
||||||
( -- * Unsafe Primitives
|
( -- * Bounded Primitives
|
||||||
Builder(..)
|
Builder(..)
|
||||||
, construct
|
, construct
|
||||||
, fromUnsafe
|
, fromBounded
|
||||||
-- * Evaluation
|
-- * Evaluation
|
||||||
, run
|
, run
|
||||||
, pasteST
|
, pasteST
|
||||||
|
@ -24,6 +24,8 @@ module Data.ByteArray.Builder.Small
|
||||||
, bytearray
|
, bytearray
|
||||||
, shortTextUtf8
|
, shortTextUtf8
|
||||||
, shortTextJsonString
|
, shortTextJsonString
|
||||||
|
, cstring
|
||||||
|
, stringUtf8
|
||||||
-- * Encode Integral Types
|
-- * Encode Integral Types
|
||||||
-- ** Human-Readable
|
-- ** Human-Readable
|
||||||
, word64Dec
|
, word64Dec
|
||||||
|
@ -34,10 +36,13 @@ module Data.ByteArray.Builder.Small
|
||||||
, word32PaddedUpperHex
|
, word32PaddedUpperHex
|
||||||
, word16PaddedUpperHex
|
, word16PaddedUpperHex
|
||||||
, word8PaddedUpperHex
|
, word8PaddedUpperHex
|
||||||
|
, ascii
|
||||||
|
, char
|
||||||
-- ** Machine-Readable
|
-- ** Machine-Readable
|
||||||
, word64BE
|
, word64BE
|
||||||
, word32BE
|
, word32BE
|
||||||
, word16BE
|
, word16BE
|
||||||
|
, word8
|
||||||
-- * Encode Floating-Point Types
|
-- * Encode Floating-Point Types
|
||||||
-- ** Human-Readable
|
-- ** Human-Readable
|
||||||
, doubleDec
|
, doubleDec
|
||||||
|
@ -46,43 +51,27 @@ module Data.ByteArray.Builder.Small
|
||||||
import Control.Monad.Primitive
|
import Control.Monad.Primitive
|
||||||
import Control.Monad.ST
|
import Control.Monad.ST
|
||||||
import Control.Monad.ST.Run (runByteArrayST)
|
import Control.Monad.ST.Run (runByteArrayST)
|
||||||
import Data.Bytes.Types
|
import Data.ByteArray.Builder.Unsafe (Builder(Builder))
|
||||||
import Data.Primitive
|
import Data.ByteArray.Builder.Unsafe (stringUtf8,cstring)
|
||||||
import Data.Int (Int64)
|
|
||||||
import GHC.Exts
|
|
||||||
import GHC.ST
|
|
||||||
import GHC.Word
|
|
||||||
import GHC.TypeLits (KnownNat,natVal')
|
|
||||||
import Data.Primitive.ByteArray.Offset (MutableByteArrayOffset(..))
|
|
||||||
import Data.ByteString.Short.Internal (ShortByteString(SBS))
|
import Data.ByteString.Short.Internal (ShortByteString(SBS))
|
||||||
import Data.Text.Short (ShortText)
|
import Data.Bytes.Types (Bytes(Bytes),MutableBytes(MutableBytes))
|
||||||
import Data.Char (ord)
|
import Data.Char (ord)
|
||||||
|
import Data.Int (Int64)
|
||||||
|
import Data.Primitive
|
||||||
|
import Data.Primitive.ByteArray.Offset (MutableByteArrayOffset(..))
|
||||||
|
import Data.Text.Short (ShortText)
|
||||||
|
import GHC.Exts
|
||||||
|
import GHC.ST (ST(ST))
|
||||||
|
import GHC.Word
|
||||||
|
|
||||||
|
import qualified Arithmetic.Nat as Nat
|
||||||
|
import qualified Arithmetic.Types as Arithmetic
|
||||||
import qualified GHC.Exts as Exts
|
import qualified GHC.Exts as Exts
|
||||||
import qualified Data.Text.Short as TS
|
import qualified Data.Text.Short as TS
|
||||||
import qualified Data.Primitive as PM
|
import qualified Data.Primitive as PM
|
||||||
import qualified Data.Vector as V
|
import qualified Data.Vector as V
|
||||||
import qualified Data.ByteArray.Builder.Small.Unsafe as Unsafe
|
import qualified Data.ByteArray.Builder.Bounded as Bounded
|
||||||
|
import qualified Data.ByteArray.Builder.Bounded.Unsafe as UnsafeBounded
|
||||||
-- | An unmaterialized sequence of bytes that may be pasted
|
|
||||||
-- into a mutable byte array.
|
|
||||||
newtype Builder = Builder
|
|
||||||
-- This functions takes an offset and a number of remaining bytes
|
|
||||||
-- and returns the new offset.
|
|
||||||
(forall s. MutableByteArray# s -> Int# -> Int# -> State# s -> (# State# s, Int# #))
|
|
||||||
|
|
||||||
instance Semigroup Builder where
|
|
||||||
{-# inline (<>) #-}
|
|
||||||
Builder f <> Builder g = Builder $ \arr off0 len0 s0 -> case f arr off0 len0 s0 of
|
|
||||||
(# s1, r #) -> case r /=# (-1#) of
|
|
||||||
1# -> g arr r (len0 +# (off0 -# r)) s1
|
|
||||||
_ -> (# s1, (-1#) #)
|
|
||||||
|
|
||||||
instance Monoid Builder where
|
|
||||||
mempty = Builder $ \_ off0 _ s0 -> (# s0, off0 #)
|
|
||||||
|
|
||||||
instance IsString Builder where
|
|
||||||
fromString = shortTextUtf8 . TS.fromString
|
|
||||||
|
|
||||||
-- | Run a builder. An accurate size hint is important for good performance.
|
-- | Run a builder. An accurate size hint is important for good performance.
|
||||||
-- The size hint should be slightly larger than the actual size.
|
-- The size hint should be slightly larger than the actual size.
|
||||||
|
@ -98,7 +87,7 @@ run hint b = runByteArrayST $ do
|
||||||
Just len -> do
|
Just len -> do
|
||||||
shrinkMutableByteArray arr len
|
shrinkMutableByteArray arr len
|
||||||
unsafeFreezeByteArray arr
|
unsafeFreezeByteArray arr
|
||||||
go hint
|
go (max hint 1)
|
||||||
|
|
||||||
-- | Variant of 'pasteArrayST' that runs in 'IO'.
|
-- | Variant of 'pasteArrayST' that runs in 'IO'.
|
||||||
pasteArrayIO ::
|
pasteArrayIO ::
|
||||||
|
@ -183,13 +172,19 @@ construct f = Builder
|
||||||
Nothing -> (# s1, (-1#) #)
|
Nothing -> (# s1, (-1#) #)
|
||||||
Just (I# n) -> (# s1, n #)
|
Just (I# n) -> (# s1, n #)
|
||||||
|
|
||||||
fromUnsafe :: forall n. KnownNat n => Unsafe.Builder n -> Builder
|
-- | Convert a bounded builder to an unbounded one. If the size
|
||||||
{-# inline fromUnsafe #-}
|
-- is a constant, use @Arithmetic.Nat.constant@ as the first argument
|
||||||
fromUnsafe (Unsafe.Builder f) = Builder $ \arr off len s0 ->
|
-- to let GHC conjure up this value for you.
|
||||||
case fromIntegral (natVal' (proxy# :: Proxy# n)) of
|
fromBounded ::
|
||||||
I# req -> case len >=# req of
|
Arithmetic.Nat n
|
||||||
1# -> f arr off s0
|
-> Bounded.Builder n
|
||||||
_ -> (# s0, (-1#) #)
|
-> Builder
|
||||||
|
{-# inline fromBounded #-}
|
||||||
|
fromBounded n (UnsafeBounded.Builder f) = Builder $ \arr off len s0 ->
|
||||||
|
let !(I# req) = Nat.demote n in
|
||||||
|
case len >=# req of
|
||||||
|
1# -> f arr off s0
|
||||||
|
_ -> (# s0, (-1#) #)
|
||||||
|
|
||||||
-- | Create a builder from an unsliced byte sequence.
|
-- | Create a builder from an unsliced byte sequence.
|
||||||
bytearray :: ByteArray -> Builder
|
bytearray :: ByteArray -> Builder
|
||||||
|
@ -223,8 +218,8 @@ slicedUtf8TextJson !src# !soff0# !slen0# = construct $ \(MutableBytes dst doff0
|
||||||
then PM.writeByteArray dst doff (c2w c) *> go (soff + 1) (slen - 1) (doff + 1)
|
then PM.writeByteArray dst doff (c2w c) *> go (soff + 1) (slen - 1) (doff + 1)
|
||||||
else do
|
else do
|
||||||
write2 dst doff '\\' 'u'
|
write2 dst doff '\\' 'u'
|
||||||
doff' <- Unsafe.pasteST
|
doff' <- UnsafeBounded.pasteST
|
||||||
(Unsafe.word16PaddedUpperHex (fromIntegral (c2w c)))
|
(Bounded.word16PaddedUpperHex (fromIntegral (c2w c)))
|
||||||
dst (doff + 2)
|
dst (doff + 2)
|
||||||
go (soff + 1) (slen - 1) doff'
|
go (soff + 1) (slen - 1) doff'
|
||||||
else pure doff
|
else pure doff
|
||||||
|
@ -262,61 +257,70 @@ shortTextJsonString a =
|
||||||
-- This encoding never starts with a zero unless the
|
-- This encoding never starts with a zero unless the
|
||||||
-- argument was zero.
|
-- argument was zero.
|
||||||
word64Dec :: Word64 -> Builder
|
word64Dec :: Word64 -> Builder
|
||||||
word64Dec w = fromUnsafe (Unsafe.word64Dec w)
|
word64Dec w = fromBounded Nat.constant (Bounded.word64Dec w)
|
||||||
|
|
||||||
-- | Encodes an unsigned 16-bit integer as decimal.
|
-- | Encodes an unsigned 16-bit integer as decimal.
|
||||||
-- This encoding never starts with a zero unless the
|
-- This encoding never starts with a zero unless the
|
||||||
-- argument was zero.
|
-- argument was zero.
|
||||||
word32Dec :: Word32 -> Builder
|
word32Dec :: Word32 -> Builder
|
||||||
word32Dec w = fromUnsafe (Unsafe.word32Dec w)
|
word32Dec w = fromBounded Nat.constant (Bounded.word32Dec w)
|
||||||
|
|
||||||
-- | Encodes an unsigned 16-bit integer as decimal.
|
-- | Encodes an unsigned 16-bit integer as decimal.
|
||||||
-- This encoding never starts with a zero unless the
|
-- This encoding never starts with a zero unless the
|
||||||
-- argument was zero.
|
-- argument was zero.
|
||||||
word16Dec :: Word16 -> Builder
|
word16Dec :: Word16 -> Builder
|
||||||
word16Dec w = fromUnsafe (Unsafe.word16Dec w)
|
word16Dec w = fromBounded Nat.constant (Bounded.word16Dec w)
|
||||||
|
|
||||||
-- | Encode a double-floating-point number, using decimal notation or
|
-- | Encode a double-floating-point number, using decimal notation or
|
||||||
-- scientific notation depending on the magnitude. This has undefined
|
-- scientific notation depending on the magnitude. This has undefined
|
||||||
-- behavior when representing @+inf@, @-inf@, and @NaN@. It will not
|
-- behavior when representing @+inf@, @-inf@, and @NaN@. It will not
|
||||||
-- crash, but the generated numbers will be nonsense.
|
-- crash, but the generated numbers will be nonsense.
|
||||||
doubleDec :: Double -> Builder
|
doubleDec :: Double -> Builder
|
||||||
doubleDec w = fromUnsafe (Unsafe.doubleDec w)
|
doubleDec w = fromBounded Nat.constant (Bounded.doubleDec w)
|
||||||
|
|
||||||
-- | Encodes a signed 64-bit integer as decimal.
|
-- | Encodes a signed 64-bit integer as decimal.
|
||||||
-- This encoding never starts with a zero unless the argument was zero.
|
-- This encoding never starts with a zero unless the argument was zero.
|
||||||
-- Negative numbers are preceded by a minus sign. Positive numbers
|
-- Negative numbers are preceded by a minus sign. Positive numbers
|
||||||
-- are not preceded by anything.
|
-- are not preceded by anything.
|
||||||
int64Dec :: Int64 -> Builder
|
int64Dec :: Int64 -> Builder
|
||||||
int64Dec w = fromUnsafe (Unsafe.int64Dec w)
|
int64Dec w = fromBounded Nat.constant (Bounded.int64Dec w)
|
||||||
|
|
||||||
-- | Encode a 64-bit unsigned integer as hexadecimal, zero-padding
|
-- | Encode a 64-bit unsigned integer as hexadecimal, zero-padding
|
||||||
-- the encoding to 16 digits. This uses uppercase for the alphabetical
|
-- the encoding to 16 digits. This uses uppercase for the alphabetical
|
||||||
-- digits. For example, this encodes the number 1022 as @00000000000003FE@.
|
-- digits. For example, this encodes the number 1022 as @00000000000003FE@.
|
||||||
word64PaddedUpperHex :: Word64 -> Builder
|
word64PaddedUpperHex :: Word64 -> Builder
|
||||||
word64PaddedUpperHex w =
|
word64PaddedUpperHex w =
|
||||||
fromUnsafe (Unsafe.word64PaddedUpperHex w)
|
fromBounded Nat.constant (Bounded.word64PaddedUpperHex w)
|
||||||
|
|
||||||
-- | Encode a 32-bit unsigned integer as hexadecimal, zero-padding
|
-- | Encode a 32-bit unsigned integer as hexadecimal, zero-padding
|
||||||
-- the encoding to 8 digits. This uses uppercase for the alphabetical
|
-- the encoding to 8 digits. This uses uppercase for the alphabetical
|
||||||
-- digits. For example, this encodes the number 1022 as @000003FE@.
|
-- digits. For example, this encodes the number 1022 as @000003FE@.
|
||||||
word32PaddedUpperHex :: Word32 -> Builder
|
word32PaddedUpperHex :: Word32 -> Builder
|
||||||
word32PaddedUpperHex w =
|
word32PaddedUpperHex w =
|
||||||
fromUnsafe (Unsafe.word32PaddedUpperHex w)
|
fromBounded Nat.constant (Bounded.word32PaddedUpperHex w)
|
||||||
|
|
||||||
-- | Encode a 16-bit unsigned integer as hexadecimal, zero-padding
|
-- | Encode a 16-bit unsigned integer as hexadecimal, zero-padding
|
||||||
-- the encoding to 4 digits. This uses uppercase for the alphabetical
|
-- the encoding to 4 digits. This uses uppercase for the alphabetical
|
||||||
-- digits. For example, this encodes the number 1022 as @03FE@.
|
-- digits. For example, this encodes the number 1022 as @03FE@.
|
||||||
word16PaddedUpperHex :: Word16 -> Builder
|
word16PaddedUpperHex :: Word16 -> Builder
|
||||||
word16PaddedUpperHex w =
|
word16PaddedUpperHex w =
|
||||||
fromUnsafe (Unsafe.word16PaddedUpperHex w)
|
fromBounded Nat.constant (Bounded.word16PaddedUpperHex w)
|
||||||
|
|
||||||
-- | Encode a 8-bit unsigned integer as hexadecimal, zero-padding
|
-- | Encode a 8-bit unsigned integer as hexadecimal, zero-padding
|
||||||
-- the encoding to 2 digits. This uses uppercase for the alphabetical
|
-- the encoding to 2 digits. This uses uppercase for the alphabetical
|
||||||
-- digits. For example, this encodes the number 11 as @0B@.
|
-- digits. For example, this encodes the number 11 as @0B@.
|
||||||
word8PaddedUpperHex :: Word8 -> Builder
|
word8PaddedUpperHex :: Word8 -> Builder
|
||||||
word8PaddedUpperHex w =
|
word8PaddedUpperHex w =
|
||||||
fromUnsafe (Unsafe.word8PaddedUpperHex w)
|
fromBounded Nat.constant (Bounded.word8PaddedUpperHex w)
|
||||||
|
|
||||||
|
-- | Encode an ASCII char.
|
||||||
|
-- Precondition: Input must be an ASCII character. This is not checked.
|
||||||
|
ascii :: Char -> Builder
|
||||||
|
ascii c = fromBounded Nat.constant (Bounded.char c)
|
||||||
|
|
||||||
|
-- | Encode an UTF8 char. This only uses as much space as is required.
|
||||||
|
char :: Char -> Builder
|
||||||
|
char c = fromBounded Nat.constant (Bounded.char c)
|
||||||
|
|
||||||
unST :: ST s a -> State# s -> (# State# s, a #)
|
unST :: ST s a -> State# s -> (# State# s, a #)
|
||||||
unST (ST f) = f
|
unST (ST f) = f
|
||||||
|
@ -328,17 +332,20 @@ shrinkMutableByteArray (MutableByteArray arr) (I# sz) =
|
||||||
-- | Requires exactly 8 bytes. Dump the octets of a 64-bit
|
-- | Requires exactly 8 bytes. Dump the octets of a 64-bit
|
||||||
-- word in a big-endian fashion.
|
-- word in a big-endian fashion.
|
||||||
word64BE :: Word64 -> Builder
|
word64BE :: Word64 -> Builder
|
||||||
word64BE w = fromUnsafe (Unsafe.word64BE w)
|
word64BE w = fromBounded Nat.constant (Bounded.word64BE w)
|
||||||
|
|
||||||
-- | Requires exactly 4 bytes. Dump the octets of a 32-bit
|
-- | Requires exactly 4 bytes. Dump the octets of a 32-bit
|
||||||
-- word in a big-endian fashion.
|
-- word in a big-endian fashion.
|
||||||
word32BE :: Word32 -> Builder
|
word32BE :: Word32 -> Builder
|
||||||
word32BE w = fromUnsafe (Unsafe.word32BE w)
|
word32BE w = fromBounded Nat.constant (Bounded.word32BE w)
|
||||||
|
|
||||||
-- | Requires exactly 2 bytes. Dump the octets of a 16-bit
|
-- | Requires exactly 2 bytes. Dump the octets of a 16-bit
|
||||||
-- word in a big-endian fashion.
|
-- word in a big-endian fashion.
|
||||||
word16BE :: Word16 -> Builder
|
word16BE :: Word16 -> Builder
|
||||||
word16BE w = fromUnsafe (Unsafe.word16BE w)
|
word16BE w = fromBounded Nat.constant (Bounded.word16BE w)
|
||||||
|
|
||||||
|
word8 :: Word8 -> Builder
|
||||||
|
word8 w = fromBounded Nat.constant (Bounded.word8 w)
|
||||||
|
|
||||||
-- ShortText is already UTF-8 encoded. This is a no-op.
|
-- ShortText is already UTF-8 encoded. This is a no-op.
|
||||||
shortTextToByteArray :: ShortText -> ByteArray
|
shortTextToByteArray :: ShortText -> ByteArray
|
||||||
|
@ -349,4 +356,4 @@ indexChar8Array :: ByteArray -> Int -> Char
|
||||||
indexChar8Array (ByteArray b) (I# i) = C# (Exts.indexCharArray# b i)
|
indexChar8Array (ByteArray b) (I# i) = C# (Exts.indexCharArray# b i)
|
||||||
|
|
||||||
c2w :: Char -> Word8
|
c2w :: Char -> Word8
|
||||||
c2w = fromIntegral . ord
|
c2w = fromIntegral . ord
|
|
@ -1,28 +1,28 @@
|
||||||
{-# language GADTSyntax #-}
|
|
||||||
{-# language KindSignatures #-}
|
|
||||||
{-# language ScopedTypeVariables #-}
|
|
||||||
{-# language BangPatterns #-}
|
{-# language BangPatterns #-}
|
||||||
{-# language MagicHash #-}
|
{-# language BinaryLiterals #-}
|
||||||
{-# language UnboxedTuples #-}
|
|
||||||
{-# language RankNTypes #-}
|
|
||||||
{-# language LambdaCase #-}
|
|
||||||
{-# language TypeOperators #-}
|
|
||||||
{-# language DataKinds #-}
|
{-# language DataKinds #-}
|
||||||
|
{-# language KindSignatures #-}
|
||||||
|
{-# language LambdaCase #-}
|
||||||
|
{-# language MagicHash #-}
|
||||||
|
{-# language RankNTypes #-}
|
||||||
|
{-# language ScopedTypeVariables #-}
|
||||||
{-# language TypeApplications #-}
|
{-# language TypeApplications #-}
|
||||||
|
{-# language TypeOperators #-}
|
||||||
|
{-# language UnboxedTuples #-}
|
||||||
|
|
||||||
-- | The functions in this module do not check to
|
-- | The functions in this module are explict in the amount of bytes they require.
|
||||||
-- see if there is enough space in the buffer.
|
module Data.ByteArray.Builder.Bounded
|
||||||
module Data.ByteArray.Builder.Small.Unsafe
|
|
||||||
( -- * Builder
|
( -- * Builder
|
||||||
Builder(..)
|
Builder
|
||||||
, construct
|
|
||||||
-- * Execute
|
-- * Execute
|
||||||
, run
|
, run
|
||||||
, pasteST
|
|
||||||
, pasteGrowST
|
, pasteGrowST
|
||||||
, pasteIO
|
|
||||||
-- * Combine
|
-- * Combine
|
||||||
|
, empty
|
||||||
, append
|
, append
|
||||||
|
-- * Bounds Manipulation
|
||||||
|
, weaken
|
||||||
|
, substitute
|
||||||
-- * Encode Integral Types
|
-- * Encode Integral Types
|
||||||
-- ** Human-Readable
|
-- ** Human-Readable
|
||||||
, word64Dec
|
, word64Dec
|
||||||
|
@ -33,6 +33,8 @@ module Data.ByteArray.Builder.Small.Unsafe
|
||||||
, word32PaddedUpperHex
|
, word32PaddedUpperHex
|
||||||
, word16PaddedUpperHex
|
, word16PaddedUpperHex
|
||||||
, word8PaddedUpperHex
|
, word8PaddedUpperHex
|
||||||
|
, ascii
|
||||||
|
, char
|
||||||
-- ** Machine-Readable
|
-- ** Machine-Readable
|
||||||
, word64BE
|
, word64BE
|
||||||
, word32BE
|
, word32BE
|
||||||
|
@ -42,95 +44,91 @@ module Data.ByteArray.Builder.Small.Unsafe
|
||||||
, doubleDec
|
, doubleDec
|
||||||
) where
|
) where
|
||||||
|
|
||||||
|
import Arithmetic.Types (type (<=), type (:=:))
|
||||||
import Control.Monad.Primitive
|
import Control.Monad.Primitive
|
||||||
import Control.Monad.ST
|
import Control.Monad.ST
|
||||||
import Data.Bits
|
import Data.Bits
|
||||||
|
import Data.ByteArray.Builder.Bounded.Unsafe (Builder(..))
|
||||||
import Data.Char (ord)
|
import Data.Char (ord)
|
||||||
import Data.Primitive
|
import Data.Primitive
|
||||||
import GHC.Exts
|
|
||||||
import GHC.ST
|
|
||||||
import GHC.Word
|
|
||||||
import GHC.Int
|
|
||||||
import Data.Kind
|
|
||||||
import GHC.TypeLits (KnownNat,Nat,type (+),natVal')
|
|
||||||
import Data.Primitive.ByteArray.Offset (MutableByteArrayOffset(..))
|
import Data.Primitive.ByteArray.Offset (MutableByteArrayOffset(..))
|
||||||
import Control.Monad (when)
|
import GHC.Exts
|
||||||
|
import GHC.Int (Int64(I64#))
|
||||||
|
import GHC.ST (ST(ST))
|
||||||
|
import GHC.TypeLits (type (+))
|
||||||
|
import GHC.Word (Word8(W8#),Word16(W16#),Word32(W32#),Word64(W64#))
|
||||||
|
|
||||||
|
import qualified Arithmetic.Types as Arithmetic
|
||||||
|
import qualified Arithmetic.Nat as Nat
|
||||||
|
import qualified Data.ByteArray.Builder.Bounded.Unsafe as Unsafe
|
||||||
import qualified Data.Primitive as PM
|
import qualified Data.Primitive as PM
|
||||||
|
|
||||||
-- | A builder parameterized by the maximum number of bytes it uses
|
-- | Execute the bounded builder. If the size is a constant,
|
||||||
-- when executed.
|
-- use @Arithmetic.Nat.constant@ as the first argument to let
|
||||||
newtype Builder :: Nat -> Type where
|
-- GHC conjure up this value for you.
|
||||||
Builder ::
|
run ::
|
||||||
(forall s. MutableByteArray# s -> Int# -> State# s -> (# State# s, Int# #))
|
Arithmetic.Nat n
|
||||||
-> Builder n
|
-> Builder n -- ^ Builder
|
||||||
|
|
||||||
-- | Execute the builder. This function is safe.
|
|
||||||
run :: forall n. KnownNat n
|
|
||||||
=> Builder n -- ^ Builder
|
|
||||||
-> ByteArray
|
-> ByteArray
|
||||||
{-# inline run #-}
|
{-# inline run #-}
|
||||||
run b = runST $ do
|
run n b = runST $ do
|
||||||
arr <- newByteArray (fromIntegral (natVal' (proxy# :: Proxy# n)))
|
arr <- newByteArray (Nat.demote n)
|
||||||
len <- pasteST b arr 0
|
len <- Unsafe.pasteST b arr 0
|
||||||
shrinkMutableByteArray arr len
|
shrinkMutableByteArray arr len
|
||||||
unsafeFreezeByteArray arr
|
unsafeFreezeByteArray arr
|
||||||
|
|
||||||
-- | This function does not enforce the known upper bound on the
|
|
||||||
-- size. It is up to the user to do this.
|
|
||||||
pasteST :: Builder n -> MutableByteArray s -> Int -> ST s Int
|
|
||||||
{-# inline pasteST #-}
|
|
||||||
pasteST (Builder f) (MutableByteArray arr) (I# off) =
|
|
||||||
ST $ \s0 -> case f arr off s0 of
|
|
||||||
(# s1, r #) -> (# s1, (I# r) #)
|
|
||||||
|
|
||||||
-- | Paste the builder into the byte array starting at offset zero.
|
-- | Paste the builder into the byte array starting at offset zero.
|
||||||
-- This reallocates the byte array if it cannot accomodate the builder,
|
-- This reallocates the byte array if it cannot accomodate the builder,
|
||||||
-- growing it by the minimum amount necessary.
|
-- growing it by the minimum amount necessary.
|
||||||
pasteGrowST :: forall n s. KnownNat n
|
pasteGrowST ::
|
||||||
=> Builder n
|
Arithmetic.Nat n
|
||||||
|
-> Builder n
|
||||||
-> MutableByteArrayOffset s
|
-> MutableByteArrayOffset s
|
||||||
-- ^ Initial buffer, used linearly. Do not reuse this argument.
|
-- ^ Initial buffer, used linearly. Do not reuse this argument.
|
||||||
-> ST s (MutableByteArrayOffset s)
|
-> ST s (MutableByteArrayOffset s)
|
||||||
-- ^ Final buffer that accomodated the builder.
|
-- ^ Final buffer that accomodated the builder.
|
||||||
{-# inline pasteGrowST #-}
|
{-# inline pasteGrowST #-}
|
||||||
pasteGrowST b !(MutableByteArrayOffset{array=arr0,offset=off0}) = do
|
pasteGrowST n b !(MutableByteArrayOffset{array=arr0,offset=off0}) = do
|
||||||
sz0 <- PM.getSizeofMutableByteArray arr0
|
sz0 <- PM.getSizeofMutableByteArray arr0
|
||||||
let req = fromIntegral (natVal' (proxy# :: Proxy# n))
|
let req = Nat.demote n
|
||||||
let sz1 = off0 + req
|
let sz1 = off0 + req
|
||||||
if sz1 <= sz0
|
if sz1 <= sz0
|
||||||
then do
|
then do
|
||||||
off1 <- pasteST b arr0 off0
|
off1 <- Unsafe.pasteST b arr0 off0
|
||||||
pure (MutableByteArrayOffset arr0 off1)
|
pure (MutableByteArrayOffset arr0 off1)
|
||||||
else do
|
else do
|
||||||
arr1 <- PM.resizeMutableByteArray arr0 sz1
|
arr1 <- PM.resizeMutableByteArray arr0 sz1
|
||||||
off1 <- pasteST b arr1 off0
|
off1 <- Unsafe.pasteST b arr1 off0
|
||||||
pure (MutableByteArrayOffset arr1 off1)
|
pure (MutableByteArrayOffset arr1 off1)
|
||||||
|
|
||||||
-- | This function does not enforce the known upper bound on the
|
-- | The monoidal unit of `append`
|
||||||
-- size. It is up to the user to do this.
|
empty :: Builder 0
|
||||||
pasteIO :: Builder n -> MutableByteArray RealWorld -> Int -> IO Int
|
empty = Builder $ \_ off0 s0 -> (# s0, off0 #)
|
||||||
{-# inline pasteIO #-}
|
|
||||||
pasteIO b m off = stToIO (pasteST b m off)
|
|
||||||
|
|
||||||
-- | Constructor for 'Builder' that works on a function with lifted
|
|
||||||
-- arguments instead of unlifted ones. This is just as unsafe as the
|
|
||||||
-- actual constructor.
|
|
||||||
construct :: (forall s. MutableByteArray s -> Int -> ST s Int) -> Builder n
|
|
||||||
{-# inline construct #-}
|
|
||||||
construct f = Builder
|
|
||||||
$ \arr off s0 ->
|
|
||||||
case unST (f (MutableByteArray arr) (I# off)) s0 of
|
|
||||||
(# s1, (I# n) #) -> (# s1, n #)
|
|
||||||
|
|
||||||
infixr 9 `append`
|
infixr 9 `append`
|
||||||
|
|
||||||
-- | Concatenate two builders.
|
-- | Concatenate two builders.
|
||||||
append :: Builder n -> Builder m -> Builder (n + m)
|
append :: Builder m -> Builder n -> Builder (m + n)
|
||||||
append (Builder f) (Builder g) =
|
append (Builder f) (Builder g) =
|
||||||
Builder $ \arr off0 s0 -> case f arr off0 s0 of
|
Builder $ \arr off0 s0 -> case f arr off0 s0 of
|
||||||
(# s1, r #) -> g arr r s1
|
(# s1, r #) -> g arr r s1
|
||||||
|
|
||||||
|
-- | Weaken the bound on the maximum number of bytes required. For example,
|
||||||
|
-- to use two builders with unequal bounds in a disjunctive setting:
|
||||||
|
--
|
||||||
|
-- > import qualified Arithmetic.Lte as Lte
|
||||||
|
-- >
|
||||||
|
-- > buildNumber :: Either Double Word64 -> Builder 32
|
||||||
|
-- > buildNumber = \case
|
||||||
|
-- > Left d -> doubleDec d
|
||||||
|
-- > Right w -> weaken (Lte.constant @19 @32) (word64Dec w)
|
||||||
|
weaken :: forall m n. (m <= n) -> Builder m -> Builder n
|
||||||
|
weaken !_ (Builder f) = Builder f
|
||||||
|
|
||||||
|
-- | Replace the upper bound on size with an equal number.
|
||||||
|
substitute :: forall m n. (m :=: n) -> Builder m -> Builder n
|
||||||
|
substitute !_ (Builder f) = Builder f
|
||||||
|
|
||||||
-- | Encode a double-floating-point number, using decimal notation or
|
-- | Encode a double-floating-point number, using decimal notation or
|
||||||
-- scientific notation depending on the magnitude. This has undefined
|
-- scientific notation depending on the magnitude. This has undefined
|
||||||
-- behavior when representing @+inf@, @-inf@, and @NaN@. It will not
|
-- behavior when representing @+inf@, @-inf@, and @NaN@. It will not
|
||||||
|
@ -164,7 +162,7 @@ int64Dec (I64# w) = int64Dec# w
|
||||||
-- the word. This is only used internally.
|
-- the word. This is only used internally.
|
||||||
wordCommonDec# :: Word# -> Builder n
|
wordCommonDec# :: Word# -> Builder n
|
||||||
{-# noinline wordCommonDec# #-}
|
{-# noinline wordCommonDec# #-}
|
||||||
wordCommonDec# w# = construct $ \arr off0 -> if w /= 0
|
wordCommonDec# w# = Unsafe.construct $ \arr off0 -> if w /= 0
|
||||||
then internalWordLoop arr off0 (W# w#)
|
then internalWordLoop arr off0 (W# w#)
|
||||||
else do
|
else do
|
||||||
writeByteArray arr off0 (c2w '0')
|
writeByteArray arr off0 (c2w '0')
|
||||||
|
@ -187,7 +185,7 @@ internalWordLoop arr off0 x0 = go off0 x0 where
|
||||||
-- | Requires up to 19 bytes.
|
-- | Requires up to 19 bytes.
|
||||||
int64Dec# :: Int# -> Builder 20
|
int64Dec# :: Int# -> Builder 20
|
||||||
{-# noinline int64Dec# #-}
|
{-# noinline int64Dec# #-}
|
||||||
int64Dec# w# = construct $ \arr off0 -> case compare w 0 of
|
int64Dec# w# = Unsafe.construct $ \arr off0 -> case compare w 0 of
|
||||||
GT -> internalWordLoop arr off0 (fromIntegral w)
|
GT -> internalWordLoop arr off0 (fromIntegral w)
|
||||||
EQ -> do
|
EQ -> do
|
||||||
writeByteArray arr off0 (c2w '0')
|
writeByteArray arr off0 (c2w '0')
|
||||||
|
@ -243,7 +241,7 @@ word8PaddedUpperHex (W8# w) = word8PaddedUpperHex# w
|
||||||
-- might not be. Benchmark this.
|
-- might not be. Benchmark this.
|
||||||
word64PaddedUpperHex# :: Word# -> Builder 16
|
word64PaddedUpperHex# :: Word# -> Builder 16
|
||||||
{-# noinline word64PaddedUpperHex# #-}
|
{-# noinline word64PaddedUpperHex# #-}
|
||||||
word64PaddedUpperHex# w# = construct $ \arr off -> do
|
word64PaddedUpperHex# w# = Unsafe.construct $ \arr off -> do
|
||||||
writeByteArray arr off (toHexUpper (unsafeShiftR w 60))
|
writeByteArray arr off (toHexUpper (unsafeShiftR w 60))
|
||||||
writeByteArray arr (off + 1) (toHexUpper (unsafeShiftR w 56))
|
writeByteArray arr (off + 1) (toHexUpper (unsafeShiftR w 56))
|
||||||
writeByteArray arr (off + 2) (toHexUpper (unsafeShiftR w 52))
|
writeByteArray arr (off + 2) (toHexUpper (unsafeShiftR w 52))
|
||||||
|
@ -266,7 +264,7 @@ word64PaddedUpperHex# w# = construct $ \arr off -> do
|
||||||
|
|
||||||
word32PaddedUpperHex# :: Word# -> Builder 8
|
word32PaddedUpperHex# :: Word# -> Builder 8
|
||||||
{-# noinline word32PaddedUpperHex# #-}
|
{-# noinline word32PaddedUpperHex# #-}
|
||||||
word32PaddedUpperHex# w# = construct $ \arr off -> do
|
word32PaddedUpperHex# w# = Unsafe.construct $ \arr off -> do
|
||||||
writeByteArray arr off (toHexUpper (unsafeShiftR w 28))
|
writeByteArray arr off (toHexUpper (unsafeShiftR w 28))
|
||||||
writeByteArray arr (off + 1) (toHexUpper (unsafeShiftR w 24))
|
writeByteArray arr (off + 1) (toHexUpper (unsafeShiftR w 24))
|
||||||
writeByteArray arr (off + 2) (toHexUpper (unsafeShiftR w 20))
|
writeByteArray arr (off + 2) (toHexUpper (unsafeShiftR w 20))
|
||||||
|
@ -283,7 +281,7 @@ word32PaddedUpperHex# w# = construct $ \arr off -> do
|
||||||
-- GHC make the decision. Open an issue on github if this is
|
-- GHC make the decision. Open an issue on github if this is
|
||||||
-- a problem.
|
-- a problem.
|
||||||
word16PaddedUpperHex# :: Word# -> Builder 4
|
word16PaddedUpperHex# :: Word# -> Builder 4
|
||||||
word16PaddedUpperHex# w# = construct $ \arr off -> do
|
word16PaddedUpperHex# w# = Unsafe.construct $ \arr off -> do
|
||||||
writeByteArray arr off (toHexUpper (unsafeShiftR w 12))
|
writeByteArray arr off (toHexUpper (unsafeShiftR w 12))
|
||||||
writeByteArray arr (off + 1) (toHexUpper (unsafeShiftR w 8))
|
writeByteArray arr (off + 1) (toHexUpper (unsafeShiftR w 8))
|
||||||
writeByteArray arr (off + 2) (toHexUpper (unsafeShiftR w 4))
|
writeByteArray arr (off + 2) (toHexUpper (unsafeShiftR w 4))
|
||||||
|
@ -295,17 +293,87 @@ word16PaddedUpperHex# w# = construct $ \arr off -> do
|
||||||
-- Definitely want this to inline. It's maybe a dozen instructions total.
|
-- Definitely want this to inline. It's maybe a dozen instructions total.
|
||||||
word8PaddedUpperHex# :: Word# -> Builder 2
|
word8PaddedUpperHex# :: Word# -> Builder 2
|
||||||
{-# inline word8PaddedUpperHex #-}
|
{-# inline word8PaddedUpperHex #-}
|
||||||
word8PaddedUpperHex# w# = construct $ \arr off -> do
|
word8PaddedUpperHex# w# = Unsafe.construct $ \arr off -> do
|
||||||
writeByteArray arr off (toHexUpper (unsafeShiftR w 4))
|
writeByteArray arr off (toHexUpper (unsafeShiftR w 4))
|
||||||
writeByteArray arr (off + 1) (toHexUpper (unsafeShiftR w 0))
|
writeByteArray arr (off + 1) (toHexUpper (unsafeShiftR w 0))
|
||||||
pure (off + 2)
|
pure (off + 2)
|
||||||
where
|
where
|
||||||
w = W# w#
|
w = W# w#
|
||||||
|
|
||||||
|
-- | Encode an ASCII char.
|
||||||
|
-- Precondition: Input must be an ASCII character. This is not checked.
|
||||||
|
ascii :: Char -> Builder 1
|
||||||
|
ascii c = word8 (fromIntegral @Int @Word8 (ord c))
|
||||||
|
|
||||||
|
-- | Encode a character as UTF-8. This only uses as much space as is required.
|
||||||
|
char :: Char -> Builder 4
|
||||||
|
char c
|
||||||
|
| codepoint < 0x80 = Unsafe.construct $ \arr off -> do
|
||||||
|
writeByteArray arr off (unsafeWordToWord8 codepoint)
|
||||||
|
pure (off + 1)
|
||||||
|
| codepoint < 0x800 = Unsafe.construct $ \arr off -> do
|
||||||
|
writeByteArray arr off (unsafeWordToWord8 (byteTwoOne codepoint))
|
||||||
|
writeByteArray arr (off + 1) (unsafeWordToWord8 (byteTwoTwo codepoint))
|
||||||
|
return (off + 2)
|
||||||
|
| codepoint >= 0xD800 && codepoint < 0xE000 = Unsafe.construct $ \arr off -> do
|
||||||
|
-- Codepoint U+FFFD
|
||||||
|
writeByteArray arr off (0xEF :: Word8)
|
||||||
|
writeByteArray arr (off + 1) (0xBF :: Word8)
|
||||||
|
writeByteArray arr (off + 2) (0xBD :: Word8)
|
||||||
|
return (off + 3)
|
||||||
|
| codepoint < 0x10000 = Unsafe.construct $ \arr off -> do
|
||||||
|
writeByteArray arr off (unsafeWordToWord8 (byteThreeOne codepoint))
|
||||||
|
writeByteArray arr (off + 1) (unsafeWordToWord8 (byteThreeTwo codepoint))
|
||||||
|
writeByteArray arr (off + 2) (unsafeWordToWord8 (byteThreeThree codepoint))
|
||||||
|
return (off + 3)
|
||||||
|
| otherwise = Unsafe.construct $ \arr off -> do
|
||||||
|
writeByteArray arr off (unsafeWordToWord8 (byteFourOne codepoint))
|
||||||
|
writeByteArray arr (off + 1) (unsafeWordToWord8 (byteFourTwo codepoint))
|
||||||
|
writeByteArray arr (off + 2) (unsafeWordToWord8 (byteFourThree codepoint))
|
||||||
|
writeByteArray arr (off + 3) (unsafeWordToWord8 (byteFourFour codepoint))
|
||||||
|
return (off + 4)
|
||||||
|
|
||||||
|
where
|
||||||
|
codepoint :: Word
|
||||||
|
codepoint = fromIntegral (ord c)
|
||||||
|
|
||||||
|
unsafeWordToWord8 :: Word -> Word8
|
||||||
|
unsafeWordToWord8 (W# w) = W8# w
|
||||||
|
|
||||||
|
-- precondition: codepoint is less than 0x800
|
||||||
|
byteTwoOne :: Word -> Word
|
||||||
|
byteTwoOne w = unsafeShiftR w 6 .|. 0b11000000
|
||||||
|
|
||||||
|
byteTwoTwo :: Word -> Word
|
||||||
|
byteTwoTwo w = (w .&. 0b00111111) .|. 0b10000000
|
||||||
|
|
||||||
|
-- precondition: codepoint is less than 0x1000
|
||||||
|
byteThreeOne :: Word -> Word
|
||||||
|
byteThreeOne w = unsafeShiftR w 12 .|. 0b11100000
|
||||||
|
|
||||||
|
byteThreeTwo :: Word -> Word
|
||||||
|
byteThreeTwo w = (0b00111111 .&. unsafeShiftR w 6) .|. 0b10000000
|
||||||
|
|
||||||
|
byteThreeThree :: Word -> Word
|
||||||
|
byteThreeThree w = (w .&. 0b00111111) .|. 0b10000000
|
||||||
|
|
||||||
|
-- precondition: codepoint is less than 0x110000
|
||||||
|
byteFourOne :: Word -> Word
|
||||||
|
byteFourOne w = unsafeShiftR w 18 .|. 0b11110000
|
||||||
|
|
||||||
|
byteFourTwo :: Word -> Word
|
||||||
|
byteFourTwo w = (0b00111111 .&. unsafeShiftR w 12) .|. 0b10000000
|
||||||
|
|
||||||
|
byteFourThree :: Word -> Word
|
||||||
|
byteFourThree w = (0b00111111 .&. unsafeShiftR w 6) .|. 0b10000000
|
||||||
|
|
||||||
|
byteFourFour :: Word -> Word
|
||||||
|
byteFourFour w = (0b00111111 .&. w) .|. 0b10000000
|
||||||
|
|
||||||
-- | Requires exactly 8 bytes. Dump the octets of a 64-bit
|
-- | Requires exactly 8 bytes. Dump the octets of a 64-bit
|
||||||
-- word in a big-endian fashion.
|
-- word in a big-endian fashion.
|
||||||
word64BE :: Word64 -> Builder 8
|
word64BE :: Word64 -> Builder 8
|
||||||
word64BE w = construct $ \arr off -> do
|
word64BE w = Unsafe.construct $ \arr off -> do
|
||||||
writeByteArray arr (off ) (fromIntegral @Word64 @Word8 (unsafeShiftR w 56))
|
writeByteArray arr (off ) (fromIntegral @Word64 @Word8 (unsafeShiftR w 56))
|
||||||
writeByteArray arr (off + 1) (fromIntegral @Word64 @Word8 (unsafeShiftR w 48))
|
writeByteArray arr (off + 1) (fromIntegral @Word64 @Word8 (unsafeShiftR w 48))
|
||||||
writeByteArray arr (off + 2) (fromIntegral @Word64 @Word8 (unsafeShiftR w 40))
|
writeByteArray arr (off + 2) (fromIntegral @Word64 @Word8 (unsafeShiftR w 40))
|
||||||
|
@ -319,7 +387,7 @@ word64BE w = construct $ \arr off -> do
|
||||||
-- | Requires exactly 4 bytes. Dump the octets of a 32-bit
|
-- | Requires exactly 4 bytes. Dump the octets of a 32-bit
|
||||||
-- word in a big-endian fashion.
|
-- word in a big-endian fashion.
|
||||||
word32BE :: Word32 -> Builder 4
|
word32BE :: Word32 -> Builder 4
|
||||||
word32BE w = construct $ \arr off -> do
|
word32BE w = Unsafe.construct $ \arr off -> do
|
||||||
writeByteArray arr (off ) (fromIntegral @Word32 @Word8 (unsafeShiftR w 24))
|
writeByteArray arr (off ) (fromIntegral @Word32 @Word8 (unsafeShiftR w 24))
|
||||||
writeByteArray arr (off + 1) (fromIntegral @Word32 @Word8 (unsafeShiftR w 16))
|
writeByteArray arr (off + 1) (fromIntegral @Word32 @Word8 (unsafeShiftR w 16))
|
||||||
writeByteArray arr (off + 2) (fromIntegral @Word32 @Word8 (unsafeShiftR w 8))
|
writeByteArray arr (off + 2) (fromIntegral @Word32 @Word8 (unsafeShiftR w 8))
|
||||||
|
@ -329,13 +397,13 @@ word32BE w = construct $ \arr off -> do
|
||||||
-- | Requires exactly 2 bytes. Dump the octets of a 16-bit
|
-- | Requires exactly 2 bytes. Dump the octets of a 16-bit
|
||||||
-- word in a big-endian fashion.
|
-- word in a big-endian fashion.
|
||||||
word16BE :: Word16 -> Builder 2
|
word16BE :: Word16 -> Builder 2
|
||||||
word16BE w = construct $ \arr off -> do
|
word16BE w = Unsafe.construct $ \arr off -> do
|
||||||
writeByteArray arr (off ) (fromIntegral @Word16 @Word8 (unsafeShiftR w 8))
|
writeByteArray arr (off ) (fromIntegral @Word16 @Word8 (unsafeShiftR w 8))
|
||||||
writeByteArray arr (off + 1) (fromIntegral @Word16 @Word8 w)
|
writeByteArray arr (off + 1) (fromIntegral @Word16 @Word8 w)
|
||||||
pure (off + 2)
|
pure (off + 2)
|
||||||
|
|
||||||
word8 :: Word8 -> Builder 1
|
word8 :: Word8 -> Builder 1
|
||||||
word8 w = construct $ \arr off -> do
|
word8 w = Unsafe.construct $ \arr off -> do
|
||||||
writeByteArray arr off w
|
writeByteArray arr off w
|
||||||
pure (off + 1)
|
pure (off + 1)
|
||||||
|
|
||||||
|
@ -356,9 +424,6 @@ reverseBytes arr begin end = go begin end where
|
||||||
c2w :: Char -> Word8
|
c2w :: Char -> Word8
|
||||||
c2w = fromIntegral . ord
|
c2w = fromIntegral . ord
|
||||||
|
|
||||||
unST :: ST s a -> State# s -> (# State# s, a #)
|
|
||||||
unST (ST f) = f
|
|
||||||
|
|
||||||
shrinkMutableByteArray :: MutableByteArray s -> Int -> ST s ()
|
shrinkMutableByteArray :: MutableByteArray s -> Int -> ST s ()
|
||||||
shrinkMutableByteArray (MutableByteArray arr) (I# sz) =
|
shrinkMutableByteArray (MutableByteArray arr) (I# sz) =
|
||||||
primitive_ (shrinkMutableByteArray# arr sz)
|
primitive_ (shrinkMutableByteArray# arr sz)
|
59
src/Data/ByteArray/Builder/Bounded/Unsafe.hs
Normal file
59
src/Data/ByteArray/Builder/Bounded/Unsafe.hs
Normal file
|
@ -0,0 +1,59 @@
|
||||||
|
{-# language DataKinds #-}
|
||||||
|
{-# language GADTSyntax #-}
|
||||||
|
{-# language KindSignatures #-}
|
||||||
|
{-# language MagicHash #-}
|
||||||
|
{-# language RankNTypes #-}
|
||||||
|
{-# language ScopedTypeVariables #-}
|
||||||
|
{-# language UnboxedTuples #-}
|
||||||
|
|
||||||
|
module Data.ByteArray.Builder.Bounded.Unsafe
|
||||||
|
( -- * Types
|
||||||
|
Builder(..)
|
||||||
|
-- * Construct
|
||||||
|
, construct
|
||||||
|
-- * Run
|
||||||
|
, pasteST
|
||||||
|
, pasteIO
|
||||||
|
) where
|
||||||
|
|
||||||
|
import GHC.TypeLits (Nat)
|
||||||
|
import Data.Kind (Type)
|
||||||
|
import GHC.IO (stToIO)
|
||||||
|
import GHC.ST (ST(ST))
|
||||||
|
import GHC.Exts (Int(I#),RealWorld,Int#,State#,MutableByteArray#)
|
||||||
|
import Data.Primitive (MutableByteArray(..))
|
||||||
|
|
||||||
|
-- | A builder parameterized by the maximum number of bytes it uses
|
||||||
|
-- when executed.
|
||||||
|
newtype Builder :: Nat -> Type where
|
||||||
|
Builder ::
|
||||||
|
(forall s. MutableByteArray# s -> Int# -> State# s -> (# State# s, Int# #))
|
||||||
|
-> Builder n
|
||||||
|
|
||||||
|
-- | Constructor for 'Builder' that works on a function with lifted
|
||||||
|
-- arguments instead of unlifted ones. This is just as unsafe as the
|
||||||
|
-- actual constructor.
|
||||||
|
construct :: (forall s. MutableByteArray s -> Int -> ST s Int) -> Builder n
|
||||||
|
{-# inline construct #-}
|
||||||
|
construct f = Builder
|
||||||
|
$ \arr off s0 ->
|
||||||
|
case unST (f (MutableByteArray arr) (I# off)) s0 of
|
||||||
|
(# s1, (I# n) #) -> (# s1, n #)
|
||||||
|
|
||||||
|
-- | This function does not enforce the known upper bound on the
|
||||||
|
-- size. It is up to the user to do this.
|
||||||
|
pasteST :: Builder n -> MutableByteArray s -> Int -> ST s Int
|
||||||
|
{-# inline pasteST #-}
|
||||||
|
pasteST (Builder f) (MutableByteArray arr) (I# off) =
|
||||||
|
ST $ \s0 -> case f arr off s0 of
|
||||||
|
(# s1, r #) -> (# s1, (I# r) #)
|
||||||
|
|
||||||
|
-- | This function does not enforce the known upper bound on the
|
||||||
|
-- size. It is up to the user to do this.
|
||||||
|
pasteIO :: Builder n -> MutableByteArray RealWorld -> Int -> IO Int
|
||||||
|
{-# inline pasteIO #-}
|
||||||
|
pasteIO b m off = stToIO (pasteST b m off)
|
||||||
|
|
||||||
|
unST :: ST s a -> State# s -> (# State# s, a #)
|
||||||
|
unST (ST f) = f
|
||||||
|
|
94
src/Data/ByteArray/Builder/Unsafe.hs
Normal file
94
src/Data/ByteArray/Builder/Unsafe.hs
Normal file
|
@ -0,0 +1,94 @@
|
||||||
|
{-# language BangPatterns #-}
|
||||||
|
{-# language DuplicateRecordFields #-}
|
||||||
|
{-# language LambdaCase #-}
|
||||||
|
{-# language MagicHash #-}
|
||||||
|
{-# language RankNTypes #-}
|
||||||
|
{-# language ScopedTypeVariables #-}
|
||||||
|
{-# language UnboxedTuples #-}
|
||||||
|
|
||||||
|
module Data.ByteArray.Builder.Unsafe
|
||||||
|
( -- * Types
|
||||||
|
Builder(..)
|
||||||
|
-- * Safe Functions
|
||||||
|
-- | These functions are actually completely safe, but they are defined
|
||||||
|
-- here because they are used by typeclass instances. Import them from
|
||||||
|
-- @Data.ByteArray.Builder@ instead.
|
||||||
|
, stringUtf8
|
||||||
|
, cstring
|
||||||
|
) where
|
||||||
|
|
||||||
|
import Data.Primitive (MutableByteArray(MutableByteArray))
|
||||||
|
import Foreign.C.String (CString)
|
||||||
|
import GHC.Exts ((-#),(+#),(/=#),(>#))
|
||||||
|
import GHC.Exts (Addr#,Int(I#),Ptr(Ptr))
|
||||||
|
import GHC.Exts (IsString,Int#,State#,MutableByteArray#)
|
||||||
|
import GHC.ST (ST(ST))
|
||||||
|
import GHC.Base (unpackCString#,unpackCStringUtf8#)
|
||||||
|
|
||||||
|
import qualified GHC.Exts as Exts
|
||||||
|
import qualified Data.ByteArray.Builder.Bounded as Bounded
|
||||||
|
import qualified Data.ByteArray.Builder.Bounded.Unsafe as UnsafeBounded
|
||||||
|
|
||||||
|
-- | An unmaterialized sequence of bytes that may be pasted
|
||||||
|
-- into a mutable byte array.
|
||||||
|
newtype Builder = Builder
|
||||||
|
-- This functions takes an offset and a number of remaining bytes
|
||||||
|
-- and returns the new offset.
|
||||||
|
(forall s. MutableByteArray# s -> Int# -> Int# -> State# s -> (# State# s, Int# #))
|
||||||
|
|
||||||
|
instance IsString Builder where
|
||||||
|
{-# inline fromString #-}
|
||||||
|
fromString = stringUtf8
|
||||||
|
|
||||||
|
instance Semigroup Builder where
|
||||||
|
{-# inline (<>) #-}
|
||||||
|
Builder f <> Builder g = Builder $ \arr off0 len0 s0 -> case f arr off0 len0 s0 of
|
||||||
|
(# s1, r #) -> case r /=# (-1#) of
|
||||||
|
1# -> g arr r (len0 +# (off0 -# r)) s1
|
||||||
|
_ -> (# s1, (-1#) #)
|
||||||
|
|
||||||
|
instance Monoid Builder where
|
||||||
|
{-# inline mempty #-}
|
||||||
|
mempty = Builder $ \_ off0 _ s0 -> (# s0, off0 #)
|
||||||
|
|
||||||
|
-- | Create a builder from a cons-list of 'Char'. These
|
||||||
|
-- are be UTF-8 encoded.
|
||||||
|
stringUtf8 :: String -> Builder
|
||||||
|
{-# inline stringUtf8 #-}
|
||||||
|
stringUtf8 cs = Builder (\arr off0 len0 s0 -> goString cs arr off0 len0 s0)
|
||||||
|
|
||||||
|
-- | Create a builder from a @NUL@-terminated 'CString'. This ignores any
|
||||||
|
-- textual encoding, copying bytes until @NUL@ is reached.
|
||||||
|
cstring :: CString -> Builder
|
||||||
|
{-# inline cstring #-}
|
||||||
|
cstring (Ptr cs) = Builder (\arr off0 len0 s0 -> goCString cs arr off0 len0 s0)
|
||||||
|
|
||||||
|
goString :: String -> MutableByteArray# s -> Int# -> Int# -> State# s -> (# State# s, Int# #)
|
||||||
|
{-# noinline goString #-}
|
||||||
|
goString [] _ off0 _ s0 = (# s0, off0 #)
|
||||||
|
goString (c : cs) buf off0 len0 s0 = case len0 ># 3# of
|
||||||
|
1# -> case unST (UnsafeBounded.pasteST (Bounded.char c) (MutableByteArray buf) (I# off0)) s0 of
|
||||||
|
(# s1, I# off1 #) -> goString cs buf off1 (len0 -# (off1 -# off0)) s1
|
||||||
|
_ -> (# s0, (-1#) #)
|
||||||
|
|
||||||
|
-- We have to have a rule for both unpackCString# and unpackCStringUtf8#
|
||||||
|
-- since GHC uses a different function based on whether or not non-ASCII
|
||||||
|
-- codepoints are used in the string.
|
||||||
|
{-# RULES
|
||||||
|
"Builder stringUtf8/cstring" forall s a b c d.
|
||||||
|
goString (unpackCString# s) a b c d = goCString s a b c d
|
||||||
|
"Builder stringUtf8/cstring-utf8" forall s a b c d.
|
||||||
|
goString (unpackCStringUtf8# s) a b c d = goCString s a b c d
|
||||||
|
#-}
|
||||||
|
|
||||||
|
goCString :: Addr# -> MutableByteArray# s -> Int# -> Int# -> State# s -> (# State# s, Int# #)
|
||||||
|
goCString addr buf off0 len0 s0 = case Exts.indexWord8OffAddr# addr 0# of
|
||||||
|
0## -> (# s0, off0 #)
|
||||||
|
w -> case len0 of
|
||||||
|
0# -> (# s0, (-1#) #)
|
||||||
|
_ -> case Exts.writeWord8Array# buf off0 w s0 of
|
||||||
|
s1 -> goCString (Exts.plusAddr# addr 1# ) buf (off0 +# 1# ) (len0 -# 1# ) s1
|
||||||
|
|
||||||
|
unST :: ST s a -> State# s -> (# State# s, a #)
|
||||||
|
unST (ST f) = f
|
||||||
|
|
29
test/Main.hs
29
test/Main.hs
|
@ -5,24 +5,27 @@
|
||||||
|
|
||||||
import Control.Monad.ST (runST)
|
import Control.Monad.ST (runST)
|
||||||
import Data.Bytes.Types (MutableBytes(..))
|
import Data.Bytes.Types (MutableBytes(..))
|
||||||
import Data.ByteArray.Builder.Small
|
import Data.ByteArray.Builder
|
||||||
import Data.Word
|
import Data.Word
|
||||||
import Data.Char (ord)
|
import Data.Char (ord)
|
||||||
import Data.Primitive (ByteArray)
|
import Data.Primitive (ByteArray)
|
||||||
import Debug.Trace
|
|
||||||
import Test.Tasty (defaultMain,testGroup,TestTree)
|
import Test.Tasty (defaultMain,testGroup,TestTree)
|
||||||
import Test.QuickCheck ((===))
|
import Test.QuickCheck ((===))
|
||||||
import Text.Printf (printf)
|
import Text.Printf (printf)
|
||||||
import Test.Tasty.HUnit ((@=?))
|
import Test.Tasty.HUnit ((@=?))
|
||||||
|
|
||||||
|
import qualified Arithmetic.Nat as Nat
|
||||||
|
import qualified Data.ByteString as ByteString
|
||||||
import qualified Data.ByteString.Builder as BB
|
import qualified Data.ByteString.Builder as BB
|
||||||
import qualified Data.Primitive as PM
|
|
||||||
import qualified Data.List as L
|
|
||||||
import qualified Data.Vector as V
|
|
||||||
import qualified Test.Tasty.QuickCheck as TQC
|
|
||||||
import qualified Test.QuickCheck as QC
|
|
||||||
import qualified GHC.Exts as Exts
|
|
||||||
import qualified Data.ByteString.Lazy.Char8 as LB
|
import qualified Data.ByteString.Lazy.Char8 as LB
|
||||||
|
import qualified Data.List as L
|
||||||
|
import qualified Data.Primitive as PM
|
||||||
|
import qualified Data.Text as T
|
||||||
|
import qualified Data.Text.Encoding as TE
|
||||||
|
import qualified Data.Vector as V
|
||||||
|
import qualified GHC.Exts as Exts
|
||||||
import qualified Test.Tasty.HUnit as THU
|
import qualified Test.Tasty.HUnit as THU
|
||||||
|
import qualified Test.Tasty.QuickCheck as TQC
|
||||||
|
|
||||||
import qualified HexWord64
|
import qualified HexWord64
|
||||||
|
|
||||||
|
@ -54,6 +57,9 @@ tests = testGroup "Tests"
|
||||||
(runArray word64Dec (V.fromList xs))
|
(runArray word64Dec (V.fromList xs))
|
||||||
===
|
===
|
||||||
pack (foldMap show xs)
|
pack (foldMap show xs)
|
||||||
|
, THU.testCase "stringUtf8" $
|
||||||
|
packUtf8 "¿Cómo estás? I am doing well." @=?
|
||||||
|
run 1 (stringUtf8 "¿Cómo estás? I am doing well.")
|
||||||
, THU.testCase "doubleDec-A" $
|
, THU.testCase "doubleDec-A" $
|
||||||
pack (show (2 :: Int)) @=? run 1 (doubleDec 2.0)
|
pack (show (2 :: Int)) @=? run 1 (doubleDec 2.0)
|
||||||
, THU.testCase "doubleDec-B" $
|
, THU.testCase "doubleDec-B" $
|
||||||
|
@ -88,8 +94,8 @@ tests = testGroup "Tests"
|
||||||
, testGroup "alternate"
|
, testGroup "alternate"
|
||||||
[ TQC.testProperty "HexWord64" $ \x y ->
|
[ TQC.testProperty "HexWord64" $ \x y ->
|
||||||
run 1
|
run 1
|
||||||
( fromUnsafe (HexWord64.word64PaddedUpperHex x)
|
( fromBounded Nat.constant (HexWord64.word64PaddedUpperHex x)
|
||||||
<> fromUnsafe (HexWord64.word64PaddedUpperHex y)
|
<> fromBounded Nat.constant (HexWord64.word64PaddedUpperHex y)
|
||||||
)
|
)
|
||||||
===
|
===
|
||||||
pack (showWord64PaddedUpperHex x <> showWord64PaddedUpperHex y)
|
pack (showWord64PaddedUpperHex x <> showWord64PaddedUpperHex y)
|
||||||
|
@ -99,6 +105,9 @@ tests = testGroup "Tests"
|
||||||
pack :: String -> ByteArray
|
pack :: String -> ByteArray
|
||||||
pack = Exts.fromList . map (fromIntegral @Int @Word8 . ord)
|
pack = Exts.fromList . map (fromIntegral @Int @Word8 . ord)
|
||||||
|
|
||||||
|
packUtf8 :: String -> ByteArray
|
||||||
|
packUtf8 = Exts.fromList . ByteString.unpack . TE.encodeUtf8 . T.pack
|
||||||
|
|
||||||
-- This is used to test pasteArrayST
|
-- This is used to test pasteArrayST
|
||||||
runArray ::
|
runArray ::
|
||||||
(a -> Builder) -- ^ Builder
|
(a -> Builder) -- ^ Builder
|
||||||
|
|
Loading…
Reference in a new issue