Merge pull request #4 from andrewthad/bounded

Switch unsafe to bounded
This commit is contained in:
Alice McKean 2019-09-03 12:03:46 -07:00 committed by GitHub
commit 6217c8b8e7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 405 additions and 157 deletions

View file

@ -1,8 +1,10 @@
import Data.Primitive (ByteArray)
import Data.Word (Word64)
import Gauge (bgroup,bench,whnf)
import Gauge.Main (defaultMain)
import Data.Word (Word64)
import Data.Primitive (ByteArray)
import qualified Data.ByteArray.Builder.Small.Unsafe as U
import qualified Arithmetic.Nat as Nat
import qualified Data.ByteArray.Builder.Bounded as U
import qualified HexWord64
@ -33,7 +35,7 @@ data Word64s = Word64s
encodeHexWord64s :: Word64s -> ByteArray
{-# noinline encodeHexWord64s #-}
encodeHexWord64s (Word64s a b c d e f g h) = U.run $
encodeHexWord64s (Word64s a b c d e f g h) = U.run Nat.constant $
U.word64PaddedUpperHex a `U.append`
U.word64PaddedUpperHex b `U.append`
U.word64PaddedUpperHex c `U.append`
@ -45,7 +47,7 @@ encodeHexWord64s (Word64s a b c d e f g h) = U.run $
encodeHexWord64sLoop :: Word64s -> ByteArray
{-# noinline encodeHexWord64sLoop #-}
encodeHexWord64sLoop (Word64s a b c d e f g h) = U.run $
encodeHexWord64sLoop (Word64s a b c d e f g h) = U.run Nat.constant $
HexWord64.word64PaddedUpperHex a `U.append`
HexWord64.word64PaddedUpperHex b `U.append`
HexWord64.word64PaddedUpperHex c `U.append`

5
cabal.project Normal file
View file

@ -0,0 +1,5 @@
packages: .
source-repository-package
type: git
location: https://github.com/andrewthad/natural-arithmetic
tag: 68868c96b58ddaf71bb865b247d2c14c3668f4c2

View file

@ -17,7 +17,7 @@ module HexWord64
import GHC.ST (ST(ST))
import Data.Bits
import Data.ByteArray.Builder.Small.Unsafe (Builder,construct)
import Data.ByteArray.Builder.Bounded.Unsafe (Builder,construct)
import Data.Primitive
import Data.Word
import GHC.Exts

View file

@ -36,8 +36,10 @@ flag checked
library
exposed-modules:
Data.ByteArray.Builder.Small
Data.ByteArray.Builder.Small.Unsafe
Data.ByteArray.Builder
Data.ByteArray.Builder.Unsafe
Data.ByteArray.Builder.Bounded
Data.ByteArray.Builder.Bounded.Unsafe
build-depends:
, base >=4.12.0.0 && <5
, byteslice >=0.1 && <0.2
@ -46,6 +48,7 @@ library
, vector >=0.12.0.3 && <0.13
, bytestring >=0.10.8.2 && <0.11
, text-short >=0.1.3 && <0.2
, natural-arithmetic >=0.1 && <0.2
if flag(checked)
build-depends: primitive-checked >= 0.7 && <0.8
else
@ -59,18 +62,21 @@ test-suite test
type: exitcode-stdio-1.0
hs-source-dirs: test, common
main-is: Main.hs
ghc-options: -O2 -Wall
other-modules:
HexWord64
build-depends:
, QuickCheck >=2.13.1 && <2.14
, base >=4.12.0.0 && <5
, byteslice
, bytestring
, small-bytearray-builder
, QuickCheck >=2.13.1 && <2.14
, tasty-quickcheck >=0.10.1 && <0.11
, tasty-hunit >=0.10.0.2 && <0.11
, tasty >=1.2.3 && <1.3
, natural-arithmetic
, primitive
, small-bytearray-builder
, tasty >=1.2.3 && <1.3
, tasty-hunit >=0.10.0.2 && <0.11
, tasty-quickcheck >=0.10.1 && <0.11
, text >=1.2 && <1.3
, vector
benchmark bench
@ -78,6 +84,7 @@ benchmark bench
build-depends:
, base
, gauge >= 0.2.4
, natural-arithmetic
, primitive
, small-bytearray-builder
ghc-options: -Wall -O2

View file

@ -6,11 +6,11 @@
{-# language ScopedTypeVariables #-}
{-# language UnboxedTuples #-}
module Data.ByteArray.Builder.Small
( -- * Unsafe Primitives
module Data.ByteArray.Builder
( -- * Bounded Primitives
Builder(..)
, construct
, fromUnsafe
, fromBounded
-- * Evaluation
, run
, pasteST
@ -24,6 +24,8 @@ module Data.ByteArray.Builder.Small
, bytearray
, shortTextUtf8
, shortTextJsonString
, cstring
, stringUtf8
-- * Encode Integral Types
-- ** Human-Readable
, word64Dec
@ -34,10 +36,13 @@ module Data.ByteArray.Builder.Small
, word32PaddedUpperHex
, word16PaddedUpperHex
, word8PaddedUpperHex
, ascii
, char
-- ** Machine-Readable
, word64BE
, word32BE
, word16BE
, word8
-- * Encode Floating-Point Types
-- ** Human-Readable
, doubleDec
@ -46,43 +51,27 @@ module Data.ByteArray.Builder.Small
import Control.Monad.Primitive
import Control.Monad.ST
import Control.Monad.ST.Run (runByteArrayST)
import Data.Bytes.Types
import Data.Primitive
import Data.Int (Int64)
import GHC.Exts
import GHC.ST
import GHC.Word
import GHC.TypeLits (KnownNat,natVal')
import Data.Primitive.ByteArray.Offset (MutableByteArrayOffset(..))
import Data.ByteArray.Builder.Unsafe (Builder(Builder))
import Data.ByteArray.Builder.Unsafe (stringUtf8,cstring)
import Data.ByteString.Short.Internal (ShortByteString(SBS))
import Data.Text.Short (ShortText)
import Data.Bytes.Types (Bytes(Bytes),MutableBytes(MutableBytes))
import Data.Char (ord)
import Data.Int (Int64)
import Data.Primitive
import Data.Primitive.ByteArray.Offset (MutableByteArrayOffset(..))
import Data.Text.Short (ShortText)
import GHC.Exts
import GHC.ST (ST(ST))
import GHC.Word
import qualified Arithmetic.Nat as Nat
import qualified Arithmetic.Types as Arithmetic
import qualified GHC.Exts as Exts
import qualified Data.Text.Short as TS
import qualified Data.Primitive as PM
import qualified Data.Vector as V
import qualified Data.ByteArray.Builder.Small.Unsafe as Unsafe
-- | An unmaterialized sequence of bytes that may be pasted
-- into a mutable byte array.
newtype Builder = Builder
-- This functions takes an offset and a number of remaining bytes
-- and returns the new offset.
(forall s. MutableByteArray# s -> Int# -> Int# -> State# s -> (# State# s, Int# #))
instance Semigroup Builder where
{-# inline (<>) #-}
Builder f <> Builder g = Builder $ \arr off0 len0 s0 -> case f arr off0 len0 s0 of
(# s1, r #) -> case r /=# (-1#) of
1# -> g arr r (len0 +# (off0 -# r)) s1
_ -> (# s1, (-1#) #)
instance Monoid Builder where
mempty = Builder $ \_ off0 _ s0 -> (# s0, off0 #)
instance IsString Builder where
fromString = shortTextUtf8 . TS.fromString
import qualified Data.ByteArray.Builder.Bounded as Bounded
import qualified Data.ByteArray.Builder.Bounded.Unsafe as UnsafeBounded
-- | Run a builder. An accurate size hint is important for good performance.
-- The size hint should be slightly larger than the actual size.
@ -98,7 +87,7 @@ run hint b = runByteArrayST $ do
Just len -> do
shrinkMutableByteArray arr len
unsafeFreezeByteArray arr
go hint
go (max hint 1)
-- | Variant of 'pasteArrayST' that runs in 'IO'.
pasteArrayIO ::
@ -183,11 +172,17 @@ construct f = Builder
Nothing -> (# s1, (-1#) #)
Just (I# n) -> (# s1, n #)
fromUnsafe :: forall n. KnownNat n => Unsafe.Builder n -> Builder
{-# inline fromUnsafe #-}
fromUnsafe (Unsafe.Builder f) = Builder $ \arr off len s0 ->
case fromIntegral (natVal' (proxy# :: Proxy# n)) of
I# req -> case len >=# req of
-- | Convert a bounded builder to an unbounded one. If the size
-- is a constant, use @Arithmetic.Nat.constant@ as the first argument
-- to let GHC conjure up this value for you.
fromBounded ::
Arithmetic.Nat n
-> Bounded.Builder n
-> Builder
{-# inline fromBounded #-}
fromBounded n (UnsafeBounded.Builder f) = Builder $ \arr off len s0 ->
let !(I# req) = Nat.demote n in
case len >=# req of
1# -> f arr off s0
_ -> (# s0, (-1#) #)
@ -223,8 +218,8 @@ slicedUtf8TextJson !src# !soff0# !slen0# = construct $ \(MutableBytes dst doff0
then PM.writeByteArray dst doff (c2w c) *> go (soff + 1) (slen - 1) (doff + 1)
else do
write2 dst doff '\\' 'u'
doff' <- Unsafe.pasteST
(Unsafe.word16PaddedUpperHex (fromIntegral (c2w c)))
doff' <- UnsafeBounded.pasteST
(Bounded.word16PaddedUpperHex (fromIntegral (c2w c)))
dst (doff + 2)
go (soff + 1) (slen - 1) doff'
else pure doff
@ -262,61 +257,70 @@ shortTextJsonString a =
-- This encoding never starts with a zero unless the
-- argument was zero.
word64Dec :: Word64 -> Builder
word64Dec w = fromUnsafe (Unsafe.word64Dec w)
word64Dec w = fromBounded Nat.constant (Bounded.word64Dec w)
-- | Encodes an unsigned 16-bit integer as decimal.
-- This encoding never starts with a zero unless the
-- argument was zero.
word32Dec :: Word32 -> Builder
word32Dec w = fromUnsafe (Unsafe.word32Dec w)
word32Dec w = fromBounded Nat.constant (Bounded.word32Dec w)
-- | Encodes an unsigned 16-bit integer as decimal.
-- This encoding never starts with a zero unless the
-- argument was zero.
word16Dec :: Word16 -> Builder
word16Dec w = fromUnsafe (Unsafe.word16Dec w)
word16Dec w = fromBounded Nat.constant (Bounded.word16Dec w)
-- | Encode a double-floating-point number, using decimal notation or
-- scientific notation depending on the magnitude. This has undefined
-- behavior when representing @+inf@, @-inf@, and @NaN@. It will not
-- crash, but the generated numbers will be nonsense.
doubleDec :: Double -> Builder
doubleDec w = fromUnsafe (Unsafe.doubleDec w)
doubleDec w = fromBounded Nat.constant (Bounded.doubleDec w)
-- | Encodes a signed 64-bit integer as decimal.
-- This encoding never starts with a zero unless the argument was zero.
-- Negative numbers are preceded by a minus sign. Positive numbers
-- are not preceded by anything.
int64Dec :: Int64 -> Builder
int64Dec w = fromUnsafe (Unsafe.int64Dec w)
int64Dec w = fromBounded Nat.constant (Bounded.int64Dec w)
-- | Encode a 64-bit unsigned integer as hexadecimal, zero-padding
-- the encoding to 16 digits. This uses uppercase for the alphabetical
-- digits. For example, this encodes the number 1022 as @00000000000003FE@.
word64PaddedUpperHex :: Word64 -> Builder
word64PaddedUpperHex w =
fromUnsafe (Unsafe.word64PaddedUpperHex w)
fromBounded Nat.constant (Bounded.word64PaddedUpperHex w)
-- | Encode a 32-bit unsigned integer as hexadecimal, zero-padding
-- the encoding to 8 digits. This uses uppercase for the alphabetical
-- digits. For example, this encodes the number 1022 as @000003FE@.
word32PaddedUpperHex :: Word32 -> Builder
word32PaddedUpperHex w =
fromUnsafe (Unsafe.word32PaddedUpperHex w)
fromBounded Nat.constant (Bounded.word32PaddedUpperHex w)
-- | Encode a 16-bit unsigned integer as hexadecimal, zero-padding
-- the encoding to 4 digits. This uses uppercase for the alphabetical
-- digits. For example, this encodes the number 1022 as @03FE@.
word16PaddedUpperHex :: Word16 -> Builder
word16PaddedUpperHex w =
fromUnsafe (Unsafe.word16PaddedUpperHex w)
fromBounded Nat.constant (Bounded.word16PaddedUpperHex w)
-- | Encode a 8-bit unsigned integer as hexadecimal, zero-padding
-- the encoding to 2 digits. This uses uppercase for the alphabetical
-- digits. For example, this encodes the number 11 as @0B@.
word8PaddedUpperHex :: Word8 -> Builder
word8PaddedUpperHex w =
fromUnsafe (Unsafe.word8PaddedUpperHex w)
fromBounded Nat.constant (Bounded.word8PaddedUpperHex w)
-- | Encode an ASCII char.
-- Precondition: Input must be an ASCII character. This is not checked.
ascii :: Char -> Builder
ascii c = fromBounded Nat.constant (Bounded.char c)
-- | Encode an UTF8 char. This only uses as much space as is required.
char :: Char -> Builder
char c = fromBounded Nat.constant (Bounded.char c)
unST :: ST s a -> State# s -> (# State# s, a #)
unST (ST f) = f
@ -328,17 +332,20 @@ shrinkMutableByteArray (MutableByteArray arr) (I# sz) =
-- | Requires exactly 8 bytes. Dump the octets of a 64-bit
-- word in a big-endian fashion.
word64BE :: Word64 -> Builder
word64BE w = fromUnsafe (Unsafe.word64BE w)
word64BE w = fromBounded Nat.constant (Bounded.word64BE w)
-- | Requires exactly 4 bytes. Dump the octets of a 32-bit
-- word in a big-endian fashion.
word32BE :: Word32 -> Builder
word32BE w = fromUnsafe (Unsafe.word32BE w)
word32BE w = fromBounded Nat.constant (Bounded.word32BE w)
-- | Requires exactly 2 bytes. Dump the octets of a 16-bit
-- word in a big-endian fashion.
word16BE :: Word16 -> Builder
word16BE w = fromUnsafe (Unsafe.word16BE w)
word16BE w = fromBounded Nat.constant (Bounded.word16BE w)
word8 :: Word8 -> Builder
word8 w = fromBounded Nat.constant (Bounded.word8 w)
-- ShortText is already UTF-8 encoded. This is a no-op.
shortTextToByteArray :: ShortText -> ByteArray

View file

@ -1,28 +1,28 @@
{-# language GADTSyntax #-}
{-# language KindSignatures #-}
{-# language ScopedTypeVariables #-}
{-# language BangPatterns #-}
{-# language MagicHash #-}
{-# language UnboxedTuples #-}
{-# language RankNTypes #-}
{-# language LambdaCase #-}
{-# language TypeOperators #-}
{-# language BinaryLiterals #-}
{-# language DataKinds #-}
{-# language KindSignatures #-}
{-# language LambdaCase #-}
{-# language MagicHash #-}
{-# language RankNTypes #-}
{-# language ScopedTypeVariables #-}
{-# language TypeApplications #-}
{-# language TypeOperators #-}
{-# language UnboxedTuples #-}
-- | The functions in this module do not check to
-- see if there is enough space in the buffer.
module Data.ByteArray.Builder.Small.Unsafe
-- | The functions in this module are explict in the amount of bytes they require.
module Data.ByteArray.Builder.Bounded
( -- * Builder
Builder(..)
, construct
Builder
-- * Execute
, run
, pasteST
, pasteGrowST
, pasteIO
-- * Combine
, empty
, append
-- * Bounds Manipulation
, weaken
, substitute
-- * Encode Integral Types
-- ** Human-Readable
, word64Dec
@ -33,6 +33,8 @@ module Data.ByteArray.Builder.Small.Unsafe
, word32PaddedUpperHex
, word16PaddedUpperHex
, word8PaddedUpperHex
, ascii
, char
-- ** Machine-Readable
, word64BE
, word32BE
@ -42,95 +44,91 @@ module Data.ByteArray.Builder.Small.Unsafe
, doubleDec
) where
import Arithmetic.Types (type (<=), type (:=:))
import Control.Monad.Primitive
import Control.Monad.ST
import Data.Bits
import Data.ByteArray.Builder.Bounded.Unsafe (Builder(..))
import Data.Char (ord)
import Data.Primitive
import GHC.Exts
import GHC.ST
import GHC.Word
import GHC.Int
import Data.Kind
import GHC.TypeLits (KnownNat,Nat,type (+),natVal')
import Data.Primitive.ByteArray.Offset (MutableByteArrayOffset(..))
import Control.Monad (when)
import GHC.Exts
import GHC.Int (Int64(I64#))
import GHC.ST (ST(ST))
import GHC.TypeLits (type (+))
import GHC.Word (Word8(W8#),Word16(W16#),Word32(W32#),Word64(W64#))
import qualified Arithmetic.Types as Arithmetic
import qualified Arithmetic.Nat as Nat
import qualified Data.ByteArray.Builder.Bounded.Unsafe as Unsafe
import qualified Data.Primitive as PM
-- | A builder parameterized by the maximum number of bytes it uses
-- when executed.
newtype Builder :: Nat -> Type where
Builder ::
(forall s. MutableByteArray# s -> Int# -> State# s -> (# State# s, Int# #))
-> Builder n
-- | Execute the builder. This function is safe.
run :: forall n. KnownNat n
=> Builder n -- ^ Builder
-- | Execute the bounded builder. If the size is a constant,
-- use @Arithmetic.Nat.constant@ as the first argument to let
-- GHC conjure up this value for you.
run ::
Arithmetic.Nat n
-> Builder n -- ^ Builder
-> ByteArray
{-# inline run #-}
run b = runST $ do
arr <- newByteArray (fromIntegral (natVal' (proxy# :: Proxy# n)))
len <- pasteST b arr 0
run n b = runST $ do
arr <- newByteArray (Nat.demote n)
len <- Unsafe.pasteST b arr 0
shrinkMutableByteArray arr len
unsafeFreezeByteArray arr
-- | This function does not enforce the known upper bound on the
-- size. It is up to the user to do this.
pasteST :: Builder n -> MutableByteArray s -> Int -> ST s Int
{-# inline pasteST #-}
pasteST (Builder f) (MutableByteArray arr) (I# off) =
ST $ \s0 -> case f arr off s0 of
(# s1, r #) -> (# s1, (I# r) #)
-- | Paste the builder into the byte array starting at offset zero.
-- This reallocates the byte array if it cannot accomodate the builder,
-- growing it by the minimum amount necessary.
pasteGrowST :: forall n s. KnownNat n
=> Builder n
pasteGrowST ::
Arithmetic.Nat n
-> Builder n
-> MutableByteArrayOffset s
-- ^ Initial buffer, used linearly. Do not reuse this argument.
-> ST s (MutableByteArrayOffset s)
-- ^ Final buffer that accomodated the builder.
{-# inline pasteGrowST #-}
pasteGrowST b !(MutableByteArrayOffset{array=arr0,offset=off0}) = do
pasteGrowST n b !(MutableByteArrayOffset{array=arr0,offset=off0}) = do
sz0 <- PM.getSizeofMutableByteArray arr0
let req = fromIntegral (natVal' (proxy# :: Proxy# n))
let req = Nat.demote n
let sz1 = off0 + req
if sz1 <= sz0
then do
off1 <- pasteST b arr0 off0
off1 <- Unsafe.pasteST b arr0 off0
pure (MutableByteArrayOffset arr0 off1)
else do
arr1 <- PM.resizeMutableByteArray arr0 sz1
off1 <- pasteST b arr1 off0
off1 <- Unsafe.pasteST b arr1 off0
pure (MutableByteArrayOffset arr1 off1)
-- | This function does not enforce the known upper bound on the
-- size. It is up to the user to do this.
pasteIO :: Builder n -> MutableByteArray RealWorld -> Int -> IO Int
{-# inline pasteIO #-}
pasteIO b m off = stToIO (pasteST b m off)
-- | Constructor for 'Builder' that works on a function with lifted
-- arguments instead of unlifted ones. This is just as unsafe as the
-- actual constructor.
construct :: (forall s. MutableByteArray s -> Int -> ST s Int) -> Builder n
{-# inline construct #-}
construct f = Builder
$ \arr off s0 ->
case unST (f (MutableByteArray arr) (I# off)) s0 of
(# s1, (I# n) #) -> (# s1, n #)
-- | The monoidal unit of `append`
empty :: Builder 0
empty = Builder $ \_ off0 s0 -> (# s0, off0 #)
infixr 9 `append`
-- | Concatenate two builders.
append :: Builder n -> Builder m -> Builder (n + m)
append :: Builder m -> Builder n -> Builder (m + n)
append (Builder f) (Builder g) =
Builder $ \arr off0 s0 -> case f arr off0 s0 of
(# s1, r #) -> g arr r s1
-- | Weaken the bound on the maximum number of bytes required. For example,
-- to use two builders with unequal bounds in a disjunctive setting:
--
-- > import qualified Arithmetic.Lte as Lte
-- >
-- > buildNumber :: Either Double Word64 -> Builder 32
-- > buildNumber = \case
-- > Left d -> doubleDec d
-- > Right w -> weaken (Lte.constant @19 @32) (word64Dec w)
weaken :: forall m n. (m <= n) -> Builder m -> Builder n
weaken !_ (Builder f) = Builder f
-- | Replace the upper bound on size with an equal number.
substitute :: forall m n. (m :=: n) -> Builder m -> Builder n
substitute !_ (Builder f) = Builder f
-- | Encode a double-floating-point number, using decimal notation or
-- scientific notation depending on the magnitude. This has undefined
-- behavior when representing @+inf@, @-inf@, and @NaN@. It will not
@ -164,7 +162,7 @@ int64Dec (I64# w) = int64Dec# w
-- the word. This is only used internally.
wordCommonDec# :: Word# -> Builder n
{-# noinline wordCommonDec# #-}
wordCommonDec# w# = construct $ \arr off0 -> if w /= 0
wordCommonDec# w# = Unsafe.construct $ \arr off0 -> if w /= 0
then internalWordLoop arr off0 (W# w#)
else do
writeByteArray arr off0 (c2w '0')
@ -187,7 +185,7 @@ internalWordLoop arr off0 x0 = go off0 x0 where
-- | Requires up to 19 bytes.
int64Dec# :: Int# -> Builder 20
{-# noinline int64Dec# #-}
int64Dec# w# = construct $ \arr off0 -> case compare w 0 of
int64Dec# w# = Unsafe.construct $ \arr off0 -> case compare w 0 of
GT -> internalWordLoop arr off0 (fromIntegral w)
EQ -> do
writeByteArray arr off0 (c2w '0')
@ -243,7 +241,7 @@ word8PaddedUpperHex (W8# w) = word8PaddedUpperHex# w
-- might not be. Benchmark this.
word64PaddedUpperHex# :: Word# -> Builder 16
{-# noinline word64PaddedUpperHex# #-}
word64PaddedUpperHex# w# = construct $ \arr off -> do
word64PaddedUpperHex# w# = Unsafe.construct $ \arr off -> do
writeByteArray arr off (toHexUpper (unsafeShiftR w 60))
writeByteArray arr (off + 1) (toHexUpper (unsafeShiftR w 56))
writeByteArray arr (off + 2) (toHexUpper (unsafeShiftR w 52))
@ -266,7 +264,7 @@ word64PaddedUpperHex# w# = construct $ \arr off -> do
word32PaddedUpperHex# :: Word# -> Builder 8
{-# noinline word32PaddedUpperHex# #-}
word32PaddedUpperHex# w# = construct $ \arr off -> do
word32PaddedUpperHex# w# = Unsafe.construct $ \arr off -> do
writeByteArray arr off (toHexUpper (unsafeShiftR w 28))
writeByteArray arr (off + 1) (toHexUpper (unsafeShiftR w 24))
writeByteArray arr (off + 2) (toHexUpper (unsafeShiftR w 20))
@ -283,7 +281,7 @@ word32PaddedUpperHex# w# = construct $ \arr off -> do
-- GHC make the decision. Open an issue on github if this is
-- a problem.
word16PaddedUpperHex# :: Word# -> Builder 4
word16PaddedUpperHex# w# = construct $ \arr off -> do
word16PaddedUpperHex# w# = Unsafe.construct $ \arr off -> do
writeByteArray arr off (toHexUpper (unsafeShiftR w 12))
writeByteArray arr (off + 1) (toHexUpper (unsafeShiftR w 8))
writeByteArray arr (off + 2) (toHexUpper (unsafeShiftR w 4))
@ -295,17 +293,87 @@ word16PaddedUpperHex# w# = construct $ \arr off -> do
-- Definitely want this to inline. It's maybe a dozen instructions total.
word8PaddedUpperHex# :: Word# -> Builder 2
{-# inline word8PaddedUpperHex #-}
word8PaddedUpperHex# w# = construct $ \arr off -> do
word8PaddedUpperHex# w# = Unsafe.construct $ \arr off -> do
writeByteArray arr off (toHexUpper (unsafeShiftR w 4))
writeByteArray arr (off + 1) (toHexUpper (unsafeShiftR w 0))
pure (off + 2)
where
w = W# w#
-- | Encode an ASCII char.
-- Precondition: Input must be an ASCII character. This is not checked.
ascii :: Char -> Builder 1
ascii c = word8 (fromIntegral @Int @Word8 (ord c))
-- | Encode a character as UTF-8. This only uses as much space as is required.
char :: Char -> Builder 4
char c
| codepoint < 0x80 = Unsafe.construct $ \arr off -> do
writeByteArray arr off (unsafeWordToWord8 codepoint)
pure (off + 1)
| codepoint < 0x800 = Unsafe.construct $ \arr off -> do
writeByteArray arr off (unsafeWordToWord8 (byteTwoOne codepoint))
writeByteArray arr (off + 1) (unsafeWordToWord8 (byteTwoTwo codepoint))
return (off + 2)
| codepoint >= 0xD800 && codepoint < 0xE000 = Unsafe.construct $ \arr off -> do
-- Codepoint U+FFFD
writeByteArray arr off (0xEF :: Word8)
writeByteArray arr (off + 1) (0xBF :: Word8)
writeByteArray arr (off + 2) (0xBD :: Word8)
return (off + 3)
| codepoint < 0x10000 = Unsafe.construct $ \arr off -> do
writeByteArray arr off (unsafeWordToWord8 (byteThreeOne codepoint))
writeByteArray arr (off + 1) (unsafeWordToWord8 (byteThreeTwo codepoint))
writeByteArray arr (off + 2) (unsafeWordToWord8 (byteThreeThree codepoint))
return (off + 3)
| otherwise = Unsafe.construct $ \arr off -> do
writeByteArray arr off (unsafeWordToWord8 (byteFourOne codepoint))
writeByteArray arr (off + 1) (unsafeWordToWord8 (byteFourTwo codepoint))
writeByteArray arr (off + 2) (unsafeWordToWord8 (byteFourThree codepoint))
writeByteArray arr (off + 3) (unsafeWordToWord8 (byteFourFour codepoint))
return (off + 4)
where
codepoint :: Word
codepoint = fromIntegral (ord c)
unsafeWordToWord8 :: Word -> Word8
unsafeWordToWord8 (W# w) = W8# w
-- precondition: codepoint is less than 0x800
byteTwoOne :: Word -> Word
byteTwoOne w = unsafeShiftR w 6 .|. 0b11000000
byteTwoTwo :: Word -> Word
byteTwoTwo w = (w .&. 0b00111111) .|. 0b10000000
-- precondition: codepoint is less than 0x1000
byteThreeOne :: Word -> Word
byteThreeOne w = unsafeShiftR w 12 .|. 0b11100000
byteThreeTwo :: Word -> Word
byteThreeTwo w = (0b00111111 .&. unsafeShiftR w 6) .|. 0b10000000
byteThreeThree :: Word -> Word
byteThreeThree w = (w .&. 0b00111111) .|. 0b10000000
-- precondition: codepoint is less than 0x110000
byteFourOne :: Word -> Word
byteFourOne w = unsafeShiftR w 18 .|. 0b11110000
byteFourTwo :: Word -> Word
byteFourTwo w = (0b00111111 .&. unsafeShiftR w 12) .|. 0b10000000
byteFourThree :: Word -> Word
byteFourThree w = (0b00111111 .&. unsafeShiftR w 6) .|. 0b10000000
byteFourFour :: Word -> Word
byteFourFour w = (0b00111111 .&. w) .|. 0b10000000
-- | Requires exactly 8 bytes. Dump the octets of a 64-bit
-- word in a big-endian fashion.
word64BE :: Word64 -> Builder 8
word64BE w = construct $ \arr off -> do
word64BE w = Unsafe.construct $ \arr off -> do
writeByteArray arr (off ) (fromIntegral @Word64 @Word8 (unsafeShiftR w 56))
writeByteArray arr (off + 1) (fromIntegral @Word64 @Word8 (unsafeShiftR w 48))
writeByteArray arr (off + 2) (fromIntegral @Word64 @Word8 (unsafeShiftR w 40))
@ -319,7 +387,7 @@ word64BE w = construct $ \arr off -> do
-- | Requires exactly 4 bytes. Dump the octets of a 32-bit
-- word in a big-endian fashion.
word32BE :: Word32 -> Builder 4
word32BE w = construct $ \arr off -> do
word32BE w = Unsafe.construct $ \arr off -> do
writeByteArray arr (off ) (fromIntegral @Word32 @Word8 (unsafeShiftR w 24))
writeByteArray arr (off + 1) (fromIntegral @Word32 @Word8 (unsafeShiftR w 16))
writeByteArray arr (off + 2) (fromIntegral @Word32 @Word8 (unsafeShiftR w 8))
@ -329,13 +397,13 @@ word32BE w = construct $ \arr off -> do
-- | Requires exactly 2 bytes. Dump the octets of a 16-bit
-- word in a big-endian fashion.
word16BE :: Word16 -> Builder 2
word16BE w = construct $ \arr off -> do
word16BE w = Unsafe.construct $ \arr off -> do
writeByteArray arr (off ) (fromIntegral @Word16 @Word8 (unsafeShiftR w 8))
writeByteArray arr (off + 1) (fromIntegral @Word16 @Word8 w)
pure (off + 2)
word8 :: Word8 -> Builder 1
word8 w = construct $ \arr off -> do
word8 w = Unsafe.construct $ \arr off -> do
writeByteArray arr off w
pure (off + 1)
@ -356,9 +424,6 @@ reverseBytes arr begin end = go begin end where
c2w :: Char -> Word8
c2w = fromIntegral . ord
unST :: ST s a -> State# s -> (# State# s, a #)
unST (ST f) = f
shrinkMutableByteArray :: MutableByteArray s -> Int -> ST s ()
shrinkMutableByteArray (MutableByteArray arr) (I# sz) =
primitive_ (shrinkMutableByteArray# arr sz)

View file

@ -0,0 +1,59 @@
{-# language DataKinds #-}
{-# language GADTSyntax #-}
{-# language KindSignatures #-}
{-# language MagicHash #-}
{-# language RankNTypes #-}
{-# language ScopedTypeVariables #-}
{-# language UnboxedTuples #-}
module Data.ByteArray.Builder.Bounded.Unsafe
( -- * Types
Builder(..)
-- * Construct
, construct
-- * Run
, pasteST
, pasteIO
) where
import GHC.TypeLits (Nat)
import Data.Kind (Type)
import GHC.IO (stToIO)
import GHC.ST (ST(ST))
import GHC.Exts (Int(I#),RealWorld,Int#,State#,MutableByteArray#)
import Data.Primitive (MutableByteArray(..))
-- | A builder parameterized by the maximum number of bytes it uses
-- when executed.
newtype Builder :: Nat -> Type where
Builder ::
(forall s. MutableByteArray# s -> Int# -> State# s -> (# State# s, Int# #))
-> Builder n
-- | Constructor for 'Builder' that works on a function with lifted
-- arguments instead of unlifted ones. This is just as unsafe as the
-- actual constructor.
construct :: (forall s. MutableByteArray s -> Int -> ST s Int) -> Builder n
{-# inline construct #-}
construct f = Builder
$ \arr off s0 ->
case unST (f (MutableByteArray arr) (I# off)) s0 of
(# s1, (I# n) #) -> (# s1, n #)
-- | This function does not enforce the known upper bound on the
-- size. It is up to the user to do this.
pasteST :: Builder n -> MutableByteArray s -> Int -> ST s Int
{-# inline pasteST #-}
pasteST (Builder f) (MutableByteArray arr) (I# off) =
ST $ \s0 -> case f arr off s0 of
(# s1, r #) -> (# s1, (I# r) #)
-- | This function does not enforce the known upper bound on the
-- size. It is up to the user to do this.
pasteIO :: Builder n -> MutableByteArray RealWorld -> Int -> IO Int
{-# inline pasteIO #-}
pasteIO b m off = stToIO (pasteST b m off)
unST :: ST s a -> State# s -> (# State# s, a #)
unST (ST f) = f

View file

@ -0,0 +1,94 @@
{-# language BangPatterns #-}
{-# language DuplicateRecordFields #-}
{-# language LambdaCase #-}
{-# language MagicHash #-}
{-# language RankNTypes #-}
{-# language ScopedTypeVariables #-}
{-# language UnboxedTuples #-}
module Data.ByteArray.Builder.Unsafe
( -- * Types
Builder(..)
-- * Safe Functions
-- | These functions are actually completely safe, but they are defined
-- here because they are used by typeclass instances. Import them from
-- @Data.ByteArray.Builder@ instead.
, stringUtf8
, cstring
) where
import Data.Primitive (MutableByteArray(MutableByteArray))
import Foreign.C.String (CString)
import GHC.Exts ((-#),(+#),(/=#),(>#))
import GHC.Exts (Addr#,Int(I#),Ptr(Ptr))
import GHC.Exts (IsString,Int#,State#,MutableByteArray#)
import GHC.ST (ST(ST))
import GHC.Base (unpackCString#,unpackCStringUtf8#)
import qualified GHC.Exts as Exts
import qualified Data.ByteArray.Builder.Bounded as Bounded
import qualified Data.ByteArray.Builder.Bounded.Unsafe as UnsafeBounded
-- | An unmaterialized sequence of bytes that may be pasted
-- into a mutable byte array.
newtype Builder = Builder
-- This functions takes an offset and a number of remaining bytes
-- and returns the new offset.
(forall s. MutableByteArray# s -> Int# -> Int# -> State# s -> (# State# s, Int# #))
instance IsString Builder where
{-# inline fromString #-}
fromString = stringUtf8
instance Semigroup Builder where
{-# inline (<>) #-}
Builder f <> Builder g = Builder $ \arr off0 len0 s0 -> case f arr off0 len0 s0 of
(# s1, r #) -> case r /=# (-1#) of
1# -> g arr r (len0 +# (off0 -# r)) s1
_ -> (# s1, (-1#) #)
instance Monoid Builder where
{-# inline mempty #-}
mempty = Builder $ \_ off0 _ s0 -> (# s0, off0 #)
-- | Create a builder from a cons-list of 'Char'. These
-- are be UTF-8 encoded.
stringUtf8 :: String -> Builder
{-# inline stringUtf8 #-}
stringUtf8 cs = Builder (\arr off0 len0 s0 -> goString cs arr off0 len0 s0)
-- | Create a builder from a @NUL@-terminated 'CString'. This ignores any
-- textual encoding, copying bytes until @NUL@ is reached.
cstring :: CString -> Builder
{-# inline cstring #-}
cstring (Ptr cs) = Builder (\arr off0 len0 s0 -> goCString cs arr off0 len0 s0)
goString :: String -> MutableByteArray# s -> Int# -> Int# -> State# s -> (# State# s, Int# #)
{-# noinline goString #-}
goString [] _ off0 _ s0 = (# s0, off0 #)
goString (c : cs) buf off0 len0 s0 = case len0 ># 3# of
1# -> case unST (UnsafeBounded.pasteST (Bounded.char c) (MutableByteArray buf) (I# off0)) s0 of
(# s1, I# off1 #) -> goString cs buf off1 (len0 -# (off1 -# off0)) s1
_ -> (# s0, (-1#) #)
-- We have to have a rule for both unpackCString# and unpackCStringUtf8#
-- since GHC uses a different function based on whether or not non-ASCII
-- codepoints are used in the string.
{-# RULES
"Builder stringUtf8/cstring" forall s a b c d.
goString (unpackCString# s) a b c d = goCString s a b c d
"Builder stringUtf8/cstring-utf8" forall s a b c d.
goString (unpackCStringUtf8# s) a b c d = goCString s a b c d
#-}
goCString :: Addr# -> MutableByteArray# s -> Int# -> Int# -> State# s -> (# State# s, Int# #)
goCString addr buf off0 len0 s0 = case Exts.indexWord8OffAddr# addr 0# of
0## -> (# s0, off0 #)
w -> case len0 of
0# -> (# s0, (-1#) #)
_ -> case Exts.writeWord8Array# buf off0 w s0 of
s1 -> goCString (Exts.plusAddr# addr 1# ) buf (off0 +# 1# ) (len0 -# 1# ) s1
unST :: ST s a -> State# s -> (# State# s, a #)
unST (ST f) = f

View file

@ -5,24 +5,27 @@
import Control.Monad.ST (runST)
import Data.Bytes.Types (MutableBytes(..))
import Data.ByteArray.Builder.Small
import Data.ByteArray.Builder
import Data.Word
import Data.Char (ord)
import Data.Primitive (ByteArray)
import Debug.Trace
import Test.Tasty (defaultMain,testGroup,TestTree)
import Test.QuickCheck ((===))
import Text.Printf (printf)
import Test.Tasty.HUnit ((@=?))
import qualified Arithmetic.Nat as Nat
import qualified Data.ByteString as ByteString
import qualified Data.ByteString.Builder as BB
import qualified Data.Primitive as PM
import qualified Data.List as L
import qualified Data.Vector as V
import qualified Test.Tasty.QuickCheck as TQC
import qualified Test.QuickCheck as QC
import qualified GHC.Exts as Exts
import qualified Data.ByteString.Lazy.Char8 as LB
import qualified Data.List as L
import qualified Data.Primitive as PM
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import qualified Data.Vector as V
import qualified GHC.Exts as Exts
import qualified Test.Tasty.HUnit as THU
import qualified Test.Tasty.QuickCheck as TQC
import qualified HexWord64
@ -54,6 +57,9 @@ tests = testGroup "Tests"
(runArray word64Dec (V.fromList xs))
===
pack (foldMap show xs)
, THU.testCase "stringUtf8" $
packUtf8 "¿Cómo estás? I am doing well." @=?
run 1 (stringUtf8 "¿Cómo estás? I am doing well.")
, THU.testCase "doubleDec-A" $
pack (show (2 :: Int)) @=? run 1 (doubleDec 2.0)
, THU.testCase "doubleDec-B" $
@ -88,8 +94,8 @@ tests = testGroup "Tests"
, testGroup "alternate"
[ TQC.testProperty "HexWord64" $ \x y ->
run 1
( fromUnsafe (HexWord64.word64PaddedUpperHex x)
<> fromUnsafe (HexWord64.word64PaddedUpperHex y)
( fromBounded Nat.constant (HexWord64.word64PaddedUpperHex x)
<> fromBounded Nat.constant (HexWord64.word64PaddedUpperHex y)
)
===
pack (showWord64PaddedUpperHex x <> showWord64PaddedUpperHex y)
@ -99,6 +105,9 @@ tests = testGroup "Tests"
pack :: String -> ByteArray
pack = Exts.fromList . map (fromIntegral @Int @Word8 . ord)
packUtf8 :: String -> ByteArray
packUtf8 = Exts.fromList . ByteString.unpack . TE.encodeUtf8 . T.pack
-- This is used to test pasteArrayST
runArray ::
(a -> Builder) -- ^ Builder