Remove the word "Small" from module names. Move unsafe functions into modules suffixed with "Unsafe". Write a better IsString instance. Depend on natural-arithmetic.

This commit is contained in:
Andrew Martin 2019-09-03 14:43:58 -04:00
parent 2eb37dfb7c
commit 2d1ea68261
9 changed files with 247 additions and 137 deletions

View file

@ -2,7 +2,7 @@ import Gauge (bgroup,bench,whnf)
import Gauge.Main (defaultMain) import Gauge.Main (defaultMain)
import Data.Word (Word64) import Data.Word (Word64)
import Data.Primitive (ByteArray) import Data.Primitive (ByteArray)
import qualified Data.ByteArray.Builder.Small.Unsafe as U import qualified Data.ByteArray.Builder.Bounded as U
import qualified HexWord64 import qualified HexWord64

5
cabal.project Normal file
View file

@ -0,0 +1,5 @@
packages: .
source-repository-package
type: git
location: https://github.com/andrewthad/natural-arithmetic
tag: 68868c96b58ddaf71bb865b247d2c14c3668f4c2

View file

@ -17,7 +17,7 @@ module HexWord64
import GHC.ST (ST(ST)) import GHC.ST (ST(ST))
import Data.Bits import Data.Bits
import Data.ByteArray.Builder.Small.Unsafe (Builder,construct) import Data.ByteArray.Builder.Bounded.Unsafe (Builder,construct)
import Data.Primitive import Data.Primitive
import Data.Word import Data.Word
import GHC.Exts import GHC.Exts

View file

@ -36,8 +36,10 @@ flag checked
library library
exposed-modules: exposed-modules:
Data.ByteArray.Builder.Small Data.ByteArray.Builder
Data.ByteArray.Builder.Small.Bounded Data.ByteArray.Builder.Unsafe
Data.ByteArray.Builder.Bounded
Data.ByteArray.Builder.Bounded.Unsafe
build-depends: build-depends:
, base >=4.12.0.0 && <5 , base >=4.12.0.0 && <5
, byteslice >=0.1 && <0.2 , byteslice >=0.1 && <0.2
@ -46,6 +48,7 @@ library
, vector >=0.12.0.3 && <0.13 , vector >=0.12.0.3 && <0.13
, bytestring >=0.10.8.2 && <0.11 , bytestring >=0.10.8.2 && <0.11
, text-short >=0.1.3 && <0.2 , text-short >=0.1.3 && <0.2
, natural-arithmetic >=0.1 && <0.2
if flag(checked) if flag(checked)
build-depends: primitive-checked >= 0.7 && <0.8 build-depends: primitive-checked >= 0.7 && <0.8
else else
@ -59,6 +62,7 @@ test-suite test
type: exitcode-stdio-1.0 type: exitcode-stdio-1.0
hs-source-dirs: test, common hs-source-dirs: test, common
main-is: Main.hs main-is: Main.hs
ghc-options: -O2 -Wall
other-modules: other-modules:
HexWord64 HexWord64
build-depends: build-depends:
@ -70,6 +74,7 @@ test-suite test
, tasty-quickcheck >=0.10.1 && <0.11 , tasty-quickcheck >=0.10.1 && <0.11
, tasty-hunit >=0.10.0.2 && <0.11 , tasty-hunit >=0.10.0.2 && <0.11
, tasty >=1.2.3 && <1.3 , tasty >=1.2.3 && <1.3
, text >=1.2 && <1.3
, primitive , primitive
, vector , vector

View file

@ -6,7 +6,7 @@
{-# language ScopedTypeVariables #-} {-# language ScopedTypeVariables #-}
{-# language UnboxedTuples #-} {-# language UnboxedTuples #-}
module Data.ByteArray.Builder.Small module Data.ByteArray.Builder
( -- * Bounded Primitives ( -- * Bounded Primitives
Builder(..) Builder(..)
, construct , construct
@ -24,6 +24,8 @@ module Data.ByteArray.Builder.Small
, bytearray , bytearray
, shortTextUtf8 , shortTextUtf8
, shortTextJsonString , shortTextJsonString
, cstring
, stringUtf8
-- * Encode Integral Types -- * Encode Integral Types
-- ** Human-Readable -- ** Human-Readable
, word64Dec , word64Dec
@ -60,32 +62,15 @@ import Data.Primitive.ByteArray.Offset (MutableByteArrayOffset(..))
import Data.ByteString.Short.Internal (ShortByteString(SBS)) import Data.ByteString.Short.Internal (ShortByteString(SBS))
import Data.Text.Short (ShortText) import Data.Text.Short (ShortText)
import Data.Char (ord) import Data.Char (ord)
import Data.ByteArray.Builder.Unsafe (Builder(Builder))
import Data.ByteArray.Builder.Unsafe (stringUtf8,cstring)
import qualified GHC.Exts as Exts import qualified GHC.Exts as Exts
import qualified Data.Text.Short as TS import qualified Data.Text.Short as TS
import qualified Data.Primitive as PM import qualified Data.Primitive as PM
import qualified Data.Vector as V import qualified Data.Vector as V
import qualified Data.ByteArray.Builder.Small.Bounded as Bounded import qualified Data.ByteArray.Builder.Bounded as Bounded
import qualified Data.ByteArray.Builder.Bounded.Unsafe as UnsafeBounded
-- | An unmaterialized sequence of bytes that may be pasted
-- into a mutable byte array.
newtype Builder = Builder
-- This functions takes an offset and a number of remaining bytes
-- and returns the new offset.
(forall s. MutableByteArray# s -> Int# -> Int# -> State# s -> (# State# s, Int# #))
instance Semigroup Builder where
{-# inline (<>) #-}
Builder f <> Builder g = Builder $ \arr off0 len0 s0 -> case f arr off0 len0 s0 of
(# s1, r #) -> case r /=# (-1#) of
1# -> g arr r (len0 +# (off0 -# r)) s1
_ -> (# s1, (-1#) #)
instance Monoid Builder where
mempty = Builder $ \_ off0 _ s0 -> (# s0, off0 #)
instance IsString Builder where
fromString = shortTextUtf8 . TS.fromString
-- | Run a builder. An accurate size hint is important for good performance. -- | Run a builder. An accurate size hint is important for good performance.
-- The size hint should be slightly larger than the actual size. -- The size hint should be slightly larger than the actual size.
@ -101,7 +86,7 @@ run hint b = runByteArrayST $ do
Just len -> do Just len -> do
shrinkMutableByteArray arr len shrinkMutableByteArray arr len
unsafeFreezeByteArray arr unsafeFreezeByteArray arr
go hint go (max hint 1)
-- | Variant of 'pasteArrayST' that runs in 'IO'. -- | Variant of 'pasteArrayST' that runs in 'IO'.
pasteArrayIO :: pasteArrayIO ::
@ -186,9 +171,10 @@ construct f = Builder
Nothing -> (# s1, (-1#) #) Nothing -> (# s1, (-1#) #)
Just (I# n) -> (# s1, n #) Just (I# n) -> (# s1, n #)
-- | Convert a bounded builder to an unbounded one.
fromBounded :: forall n. KnownNat n => Bounded.Builder n -> Builder fromBounded :: forall n. KnownNat n => Bounded.Builder n -> Builder
{-# inline fromBounded #-} {-# inline fromBounded #-}
fromBounded (Bounded.Builder f) = Builder $ \arr off len s0 -> fromBounded (UnsafeBounded.Builder f) = Builder $ \arr off len s0 ->
case fromIntegral (natVal' (proxy# :: Proxy# n)) of case fromIntegral (natVal' (proxy# :: Proxy# n)) of
I# req -> case len >=# req of I# req -> case len >=# req of
1# -> f arr off s0 1# -> f arr off s0
@ -226,7 +212,7 @@ slicedUtf8TextJson !src# !soff0# !slen0# = construct $ \(MutableBytes dst doff0
then PM.writeByteArray dst doff (c2w c) *> go (soff + 1) (slen - 1) (doff + 1) then PM.writeByteArray dst doff (c2w c) *> go (soff + 1) (slen - 1) (doff + 1)
else do else do
write2 dst doff '\\' 'u' write2 dst doff '\\' 'u'
doff' <- Bounded.pasteST doff' <- UnsafeBounded.pasteST
(Bounded.word16PaddedUpperHex (fromIntegral (c2w c))) (Bounded.word16PaddedUpperHex (fromIntegral (c2w c)))
dst (doff + 2) dst (doff + 2)
go (soff + 1) (slen - 1) doff' go (soff + 1) (slen - 1) doff'

View file

@ -1,4 +1,3 @@
{-# language GADTs #-}
{-# language KindSignatures #-} {-# language KindSignatures #-}
{-# language ScopedTypeVariables #-} {-# language ScopedTypeVariables #-}
{-# language BangPatterns #-} {-# language BangPatterns #-}
@ -12,23 +11,18 @@
{-# language TypeApplications #-} {-# language TypeApplications #-}
-- | The functions in this module are explict in the amount of bytes they require. -- | The functions in this module are explict in the amount of bytes they require.
module Data.ByteArray.Builder.Small.Bounded module Data.ByteArray.Builder.Bounded
( -- * Builder ( -- * Builder
Builder(..) Builder
, construct
-- * Execute -- * Execute
, run , run
, pasteST
, pasteGrowST , pasteGrowST
, pasteIO
-- * Combine -- * Combine
, empty , empty
, append , append
-- * Bounds Manipulation -- * Bounds Manipulation
, (<=) , weaken
, lessThanEqual , substitute
, isLessThanEqual
, raise
-- * Encode Integral Types -- * Encode Integral Types
-- ** Human-Readable -- ** Human-Readable
, word64Dec , word64Dec
@ -56,46 +50,33 @@ import Data.Bits
import Data.Char (ord) import Data.Char (ord)
import Data.Primitive import Data.Primitive
import GHC.Exts import GHC.Exts
import GHC.ST import GHC.ST (ST(ST))
import GHC.Word import GHC.Word (Word8(W8#),Word16(W16#),Word32(W32#),Word64(W64#))
import GHC.Int import GHC.Int (Int64(I64#))
import Data.Kind import GHC.TypeLits (KnownNat,type (+),natVal')
import GHC.TypeLits (KnownNat,Nat,type (+),natVal')
import qualified GHC.TypeLits as GHC
import Data.Primitive.ByteArray.Offset (MutableByteArrayOffset(..)) import Data.Primitive.ByteArray.Offset (MutableByteArrayOffset(..))
import qualified Control.Category as Cat import Arithmetic.Types (type (<=), type (:=:))
import Data.ByteArray.Builder.Bounded.Unsafe (Builder(..))
import qualified Data.ByteArray.Builder.Bounded.Unsafe as Unsafe
import qualified Data.Primitive as PM import qualified Data.Primitive as PM
-- | A builder parameterized by the maximum number of bytes it uses -- Used internally.
-- when executed.
newtype Builder :: Nat -> Type where
Builder ::
(forall s. MutableByteArray# s -> Int# -> State# s -> (# State# s, Int# #))
-> Builder n
knownNat :: KnownNat n => Proxy# n -> Int knownNat :: KnownNat n => Proxy# n -> Int
{-# inline knownNat #-}
knownNat p = fromIntegral (natVal' p) knownNat p = fromIntegral (natVal' p)
-- | Execute the builder. This function is safe. -- | Execute the bounded builder.
run :: forall n. KnownNat n run :: forall n. KnownNat n
=> Builder n -- ^ Builder => Builder n -- ^ Builder
-> ByteArray -> ByteArray
{-# inline run #-} {-# inline run #-}
run b = runST $ do run b = runST $ do
arr <- newByteArray (knownNat (proxy# :: Proxy# n)) arr <- newByteArray (knownNat (proxy# :: Proxy# n))
len <- pasteST b arr 0 len <- Unsafe.pasteST b arr 0
shrinkMutableByteArray arr len shrinkMutableByteArray arr len
unsafeFreezeByteArray arr unsafeFreezeByteArray arr
-- | This function does not enforce the known upper bound on the
-- size. It is up to the user to do this.
pasteST :: Builder n -> MutableByteArray s -> Int -> ST s Int
{-# inline pasteST #-}
pasteST (Builder f) (MutableByteArray arr) (I# off) =
ST $ \s0 -> case f arr off s0 of
(# s1, r #) -> (# s1, (I# r) #)
-- | Paste the builder into the byte array starting at offset zero. -- | Paste the builder into the byte array starting at offset zero.
-- This reallocates the byte array if it cannot accomodate the builder, -- This reallocates the byte array if it cannot accomodate the builder,
-- growing it by the minimum amount necessary. -- growing it by the minimum amount necessary.
@ -112,67 +93,40 @@ pasteGrowST b !(MutableByteArrayOffset{array=arr0,offset=off0}) = do
let sz1 = off0 + req let sz1 = off0 + req
if sz1 <= sz0 if sz1 <= sz0
then do then do
off1 <- pasteST b arr0 off0 off1 <- Unsafe.pasteST b arr0 off0
pure (MutableByteArrayOffset arr0 off1) pure (MutableByteArrayOffset arr0 off1)
else do else do
arr1 <- PM.resizeMutableByteArray arr0 sz1 arr1 <- PM.resizeMutableByteArray arr0 sz1
off1 <- pasteST b arr1 off0 off1 <- Unsafe.pasteST b arr1 off0
pure (MutableByteArrayOffset arr1 off1) pure (MutableByteArrayOffset arr1 off1)
-- | This function does not enforce the known upper bound on the
-- size. It is up to the user to do this.
pasteIO :: Builder n -> MutableByteArray RealWorld -> Int -> IO Int
{-# inline pasteIO #-}
pasteIO b m off = stToIO (pasteST b m off)
-- | Constructor for 'Builder' that works on a function with lifted
-- arguments instead of unlifted ones. This is just as unsafe as the
-- actual constructor.
construct :: (forall s. MutableByteArray s -> Int -> ST s Int) -> Builder n
{-# inline construct #-}
construct f = Builder
$ \arr off s0 ->
case unST (f (MutableByteArray arr) (I# off)) s0 of
(# s1, (I# n) #) -> (# s1, n #)
infixr 9 `append`
-- | The monoidal unit of `append` -- | The monoidal unit of `append`
empty :: Builder 0 empty :: Builder 0
empty = Builder $ \_ off0 s0 -> (# s0, off0 #) empty = Builder $ \_ off0 s0 -> (# s0, off0 #)
infixr 9 `append`
-- | Concatenate two builders. -- | Concatenate two builders.
append :: Builder n -> Builder m -> Builder (n + m) append :: Builder m -> Builder n -> Builder (m + n)
append (Builder f) (Builder g) = append (Builder f) (Builder g) =
Builder $ \arr off0 s0 -> case f arr off0 s0 of Builder $ \arr off0 s0 -> case f arr off0 s0 of
(# s1, r #) -> g arr r s1 (# s1, r #) -> g arr r s1
-- | A proof that n is less than or equal to m -- | Weaken the bound on the maximum number of bytes required. For example,
newtype (n :: Nat) <= (m :: Nat) = LessThanEqual Int -- m - n -- to use two builders with unequal bounds in a disjunctive setting:
--
-- > import qualified Arithmetic.Lte as Lte
-- >
-- > buildNumber :: Either Double Word64 -> Builder 32
-- > buildNumber = \case
-- > Left d -> doubleDec d
-- > Right w -> weaken (Lte.constant @19 @32) (word64Dec w)
weaken :: forall m n. (m <= n) -> Builder m -> Builder n
weaken !_ (Builder f) = Builder f
instance Cat.Category (<=) where -- | Replace the upper bound on size with an equal number.
id = LessThanEqual 0 substitute :: forall m n. (m :=: n) -> Builder m -> Builder n
-- b <= c (c - b) -> a <= b (b - a) -> a <= c (c - a) substitute !_ (Builder f) = Builder f
LessThanEqual cb . LessThanEqual ba = LessThanEqual (cb + ba)
-- | Dynamically check than 'n' is less than or equal to 'm'
isLessThanEqual :: (KnownNat n, KnownNat m) => Proxy# n -> Proxy# m -> Maybe (n <= m)
isLessThanEqual n m = if 0 <= diff then Just (LessThanEqual diff) else Nothing
where diff = knownNat m - knownNat n
-- | Statically check than 'n' is less than or equal to 'm'. 'n' and 'm' must be known at compile time.
lessThanEqual :: forall n m. (KnownNat n, KnownNat m, n GHC.<= m) => n <= m
lessThanEqual = LessThanEqual (knownNat (proxy# :: Proxy# m) - knownNat (proxy# :: Proxy# n))
-- | Weaken the bound on the maximum number of bytes required.
-- >>> :{
-- buildNumber :: Either Double Word64 -> Builder 32
-- buildNumber = \case
-- Left d -> doubleDec d
-- Right w -> raise lessThanEqual (word64Dec w)
-- :}
raise :: forall m n. n <= m -> Builder n -> Builder m
raise !_ (Builder f) = Builder f
-- | Encode a double-floating-point number, using decimal notation or -- | Encode a double-floating-point number, using decimal notation or
-- scientific notation depending on the magnitude. This has undefined -- scientific notation depending on the magnitude. This has undefined
@ -207,7 +161,7 @@ int64Dec (I64# w) = int64Dec# w
-- the word. This is only used internally. -- the word. This is only used internally.
wordCommonDec# :: Word# -> Builder n wordCommonDec# :: Word# -> Builder n
{-# noinline wordCommonDec# #-} {-# noinline wordCommonDec# #-}
wordCommonDec# w# = construct $ \arr off0 -> if w /= 0 wordCommonDec# w# = Unsafe.construct $ \arr off0 -> if w /= 0
then internalWordLoop arr off0 (W# w#) then internalWordLoop arr off0 (W# w#)
else do else do
writeByteArray arr off0 (c2w '0') writeByteArray arr off0 (c2w '0')
@ -230,7 +184,7 @@ internalWordLoop arr off0 x0 = go off0 x0 where
-- | Requires up to 19 bytes. -- | Requires up to 19 bytes.
int64Dec# :: Int# -> Builder 20 int64Dec# :: Int# -> Builder 20
{-# noinline int64Dec# #-} {-# noinline int64Dec# #-}
int64Dec# w# = construct $ \arr off0 -> case compare w 0 of int64Dec# w# = Unsafe.construct $ \arr off0 -> case compare w 0 of
GT -> internalWordLoop arr off0 (fromIntegral w) GT -> internalWordLoop arr off0 (fromIntegral w)
EQ -> do EQ -> do
writeByteArray arr off0 (c2w '0') writeByteArray arr off0 (c2w '0')
@ -286,7 +240,7 @@ word8PaddedUpperHex (W8# w) = word8PaddedUpperHex# w
-- might not be. Benchmark this. -- might not be. Benchmark this.
word64PaddedUpperHex# :: Word# -> Builder 16 word64PaddedUpperHex# :: Word# -> Builder 16
{-# noinline word64PaddedUpperHex# #-} {-# noinline word64PaddedUpperHex# #-}
word64PaddedUpperHex# w# = construct $ \arr off -> do word64PaddedUpperHex# w# = Unsafe.construct $ \arr off -> do
writeByteArray arr off (toHexUpper (unsafeShiftR w 60)) writeByteArray arr off (toHexUpper (unsafeShiftR w 60))
writeByteArray arr (off + 1) (toHexUpper (unsafeShiftR w 56)) writeByteArray arr (off + 1) (toHexUpper (unsafeShiftR w 56))
writeByteArray arr (off + 2) (toHexUpper (unsafeShiftR w 52)) writeByteArray arr (off + 2) (toHexUpper (unsafeShiftR w 52))
@ -309,7 +263,7 @@ word64PaddedUpperHex# w# = construct $ \arr off -> do
word32PaddedUpperHex# :: Word# -> Builder 8 word32PaddedUpperHex# :: Word# -> Builder 8
{-# noinline word32PaddedUpperHex# #-} {-# noinline word32PaddedUpperHex# #-}
word32PaddedUpperHex# w# = construct $ \arr off -> do word32PaddedUpperHex# w# = Unsafe.construct $ \arr off -> do
writeByteArray arr off (toHexUpper (unsafeShiftR w 28)) writeByteArray arr off (toHexUpper (unsafeShiftR w 28))
writeByteArray arr (off + 1) (toHexUpper (unsafeShiftR w 24)) writeByteArray arr (off + 1) (toHexUpper (unsafeShiftR w 24))
writeByteArray arr (off + 2) (toHexUpper (unsafeShiftR w 20)) writeByteArray arr (off + 2) (toHexUpper (unsafeShiftR w 20))
@ -326,7 +280,7 @@ word32PaddedUpperHex# w# = construct $ \arr off -> do
-- GHC make the decision. Open an issue on github if this is -- GHC make the decision. Open an issue on github if this is
-- a problem. -- a problem.
word16PaddedUpperHex# :: Word# -> Builder 4 word16PaddedUpperHex# :: Word# -> Builder 4
word16PaddedUpperHex# w# = construct $ \arr off -> do word16PaddedUpperHex# w# = Unsafe.construct $ \arr off -> do
writeByteArray arr off (toHexUpper (unsafeShiftR w 12)) writeByteArray arr off (toHexUpper (unsafeShiftR w 12))
writeByteArray arr (off + 1) (toHexUpper (unsafeShiftR w 8)) writeByteArray arr (off + 1) (toHexUpper (unsafeShiftR w 8))
writeByteArray arr (off + 2) (toHexUpper (unsafeShiftR w 4)) writeByteArray arr (off + 2) (toHexUpper (unsafeShiftR w 4))
@ -338,7 +292,7 @@ word16PaddedUpperHex# w# = construct $ \arr off -> do
-- Definitely want this to inline. It's maybe a dozen instructions total. -- Definitely want this to inline. It's maybe a dozen instructions total.
word8PaddedUpperHex# :: Word# -> Builder 2 word8PaddedUpperHex# :: Word# -> Builder 2
{-# inline word8PaddedUpperHex #-} {-# inline word8PaddedUpperHex #-}
word8PaddedUpperHex# w# = construct $ \arr off -> do word8PaddedUpperHex# w# = Unsafe.construct $ \arr off -> do
writeByteArray arr off (toHexUpper (unsafeShiftR w 4)) writeByteArray arr off (toHexUpper (unsafeShiftR w 4))
writeByteArray arr (off + 1) (toHexUpper (unsafeShiftR w 0)) writeByteArray arr (off + 1) (toHexUpper (unsafeShiftR w 0))
pure (off + 2) pure (off + 2)
@ -350,28 +304,28 @@ word8PaddedUpperHex# w# = construct $ \arr off -> do
ascii :: Char -> Builder 1 ascii :: Char -> Builder 1
ascii c = word8 (fromIntegral @Int @Word8 (ord c)) ascii c = word8 (fromIntegral @Int @Word8 (ord c))
-- | Encode an UTF8 char. This only uses as much space as is required. -- | Encode a character as UTF-8. This only uses as much space as is required.
char :: Char -> Builder 4 char :: Char -> Builder 4
char c char c
| codepoint < 0x80 = construct $ \arr off -> do | codepoint < 0x80 = Unsafe.construct $ \arr off -> do
writeByteArray arr off (unsafeWordToWord8 codepoint) writeByteArray arr off (unsafeWordToWord8 codepoint)
pure (off + 1) pure (off + 1)
| codepoint < 0x800 = construct $ \arr off -> do | codepoint < 0x800 = Unsafe.construct $ \arr off -> do
writeByteArray arr off (unsafeWordToWord8 (byteTwoOne codepoint)) writeByteArray arr off (unsafeWordToWord8 (byteTwoOne codepoint))
writeByteArray arr (off + 1) (unsafeWordToWord8 (byteTwoTwo codepoint)) writeByteArray arr (off + 1) (unsafeWordToWord8 (byteTwoTwo codepoint))
return (off + 2) return (off + 2)
| codepoint >= 0xD800 && codepoint < 0xE000 = construct $ \arr off -> do | codepoint >= 0xD800 && codepoint < 0xE000 = Unsafe.construct $ \arr off -> do
-- Codepoint U+FFFD -- Codepoint U+FFFD
writeByteArray arr off (0xEF :: Word8) writeByteArray arr off (0xEF :: Word8)
writeByteArray arr (off + 1) (0xBF :: Word8) writeByteArray arr (off + 1) (0xBF :: Word8)
writeByteArray arr (off + 2) (0xBD :: Word8) writeByteArray arr (off + 2) (0xBD :: Word8)
return (off + 3) return (off + 3)
| codepoint < 0x10000 = construct $ \arr off -> do | codepoint < 0x10000 = Unsafe.construct $ \arr off -> do
writeByteArray arr off (unsafeWordToWord8 (byteThreeOne codepoint)) writeByteArray arr off (unsafeWordToWord8 (byteThreeOne codepoint))
writeByteArray arr (off + 1) (unsafeWordToWord8 (byteThreeTwo codepoint)) writeByteArray arr (off + 1) (unsafeWordToWord8 (byteThreeTwo codepoint))
writeByteArray arr (off + 2) (unsafeWordToWord8 (byteThreeThree codepoint)) writeByteArray arr (off + 2) (unsafeWordToWord8 (byteThreeThree codepoint))
return (off + 3) return (off + 3)
| otherwise = construct $ \arr off -> do | otherwise = Unsafe.construct $ \arr off -> do
writeByteArray arr off (unsafeWordToWord8 (byteFourOne codepoint)) writeByteArray arr off (unsafeWordToWord8 (byteFourOne codepoint))
writeByteArray arr (off + 1) (unsafeWordToWord8 (byteFourTwo codepoint)) writeByteArray arr (off + 1) (unsafeWordToWord8 (byteFourTwo codepoint))
writeByteArray arr (off + 2) (unsafeWordToWord8 (byteFourThree codepoint)) writeByteArray arr (off + 2) (unsafeWordToWord8 (byteFourThree codepoint))
@ -418,7 +372,7 @@ char c
-- | Requires exactly 8 bytes. Dump the octets of a 64-bit -- | Requires exactly 8 bytes. Dump the octets of a 64-bit
-- word in a big-endian fashion. -- word in a big-endian fashion.
word64BE :: Word64 -> Builder 8 word64BE :: Word64 -> Builder 8
word64BE w = construct $ \arr off -> do word64BE w = Unsafe.construct $ \arr off -> do
writeByteArray arr (off ) (fromIntegral @Word64 @Word8 (unsafeShiftR w 56)) writeByteArray arr (off ) (fromIntegral @Word64 @Word8 (unsafeShiftR w 56))
writeByteArray arr (off + 1) (fromIntegral @Word64 @Word8 (unsafeShiftR w 48)) writeByteArray arr (off + 1) (fromIntegral @Word64 @Word8 (unsafeShiftR w 48))
writeByteArray arr (off + 2) (fromIntegral @Word64 @Word8 (unsafeShiftR w 40)) writeByteArray arr (off + 2) (fromIntegral @Word64 @Word8 (unsafeShiftR w 40))
@ -432,7 +386,7 @@ word64BE w = construct $ \arr off -> do
-- | Requires exactly 4 bytes. Dump the octets of a 32-bit -- | Requires exactly 4 bytes. Dump the octets of a 32-bit
-- word in a big-endian fashion. -- word in a big-endian fashion.
word32BE :: Word32 -> Builder 4 word32BE :: Word32 -> Builder 4
word32BE w = construct $ \arr off -> do word32BE w = Unsafe.construct $ \arr off -> do
writeByteArray arr (off ) (fromIntegral @Word32 @Word8 (unsafeShiftR w 24)) writeByteArray arr (off ) (fromIntegral @Word32 @Word8 (unsafeShiftR w 24))
writeByteArray arr (off + 1) (fromIntegral @Word32 @Word8 (unsafeShiftR w 16)) writeByteArray arr (off + 1) (fromIntegral @Word32 @Word8 (unsafeShiftR w 16))
writeByteArray arr (off + 2) (fromIntegral @Word32 @Word8 (unsafeShiftR w 8)) writeByteArray arr (off + 2) (fromIntegral @Word32 @Word8 (unsafeShiftR w 8))
@ -442,13 +396,13 @@ word32BE w = construct $ \arr off -> do
-- | Requires exactly 2 bytes. Dump the octets of a 16-bit -- | Requires exactly 2 bytes. Dump the octets of a 16-bit
-- word in a big-endian fashion. -- word in a big-endian fashion.
word16BE :: Word16 -> Builder 2 word16BE :: Word16 -> Builder 2
word16BE w = construct $ \arr off -> do word16BE w = Unsafe.construct $ \arr off -> do
writeByteArray arr (off ) (fromIntegral @Word16 @Word8 (unsafeShiftR w 8)) writeByteArray arr (off ) (fromIntegral @Word16 @Word8 (unsafeShiftR w 8))
writeByteArray arr (off + 1) (fromIntegral @Word16 @Word8 w) writeByteArray arr (off + 1) (fromIntegral @Word16 @Word8 w)
pure (off + 2) pure (off + 2)
word8 :: Word8 -> Builder 1 word8 :: Word8 -> Builder 1
word8 w = construct $ \arr off -> do word8 w = Unsafe.construct $ \arr off -> do
writeByteArray arr off w writeByteArray arr off w
pure (off + 1) pure (off + 1)
@ -469,9 +423,6 @@ reverseBytes arr begin end = go begin end where
c2w :: Char -> Word8 c2w :: Char -> Word8
c2w = fromIntegral . ord c2w = fromIntegral . ord
unST :: ST s a -> State# s -> (# State# s, a #)
unST (ST f) = f
shrinkMutableByteArray :: MutableByteArray s -> Int -> ST s () shrinkMutableByteArray :: MutableByteArray s -> Int -> ST s ()
shrinkMutableByteArray (MutableByteArray arr) (I# sz) = shrinkMutableByteArray (MutableByteArray arr) (I# sz) =
primitive_ (shrinkMutableByteArray# arr sz) primitive_ (shrinkMutableByteArray# arr sz)

View file

@ -0,0 +1,59 @@
{-# language DataKinds #-}
{-# language GADTSyntax #-}
{-# language KindSignatures #-}
{-# language MagicHash #-}
{-# language RankNTypes #-}
{-# language ScopedTypeVariables #-}
{-# language UnboxedTuples #-}
module Data.ByteArray.Builder.Bounded.Unsafe
( -- * Types
Builder(..)
-- * Construct
, construct
-- * Run
, pasteST
, pasteIO
) where
import GHC.TypeLits (Nat)
import Data.Kind (Type)
import GHC.IO (stToIO)
import GHC.ST (ST(ST))
import GHC.Exts (Int(I#),RealWorld,Int#,State#,MutableByteArray#)
import Data.Primitive (MutableByteArray(..))
-- | A builder parameterized by the maximum number of bytes it uses
-- when executed.
newtype Builder :: Nat -> Type where
Builder ::
(forall s. MutableByteArray# s -> Int# -> State# s -> (# State# s, Int# #))
-> Builder n
-- | Constructor for 'Builder' that works on a function with lifted
-- arguments instead of unlifted ones. This is just as unsafe as the
-- actual constructor.
construct :: (forall s. MutableByteArray s -> Int -> ST s Int) -> Builder n
{-# inline construct #-}
construct f = Builder
$ \arr off s0 ->
case unST (f (MutableByteArray arr) (I# off)) s0 of
(# s1, (I# n) #) -> (# s1, n #)
-- | This function does not enforce the known upper bound on the
-- size. It is up to the user to do this.
pasteST :: Builder n -> MutableByteArray s -> Int -> ST s Int
{-# inline pasteST #-}
pasteST (Builder f) (MutableByteArray arr) (I# off) =
ST $ \s0 -> case f arr off s0 of
(# s1, r #) -> (# s1, (I# r) #)
-- | This function does not enforce the known upper bound on the
-- size. It is up to the user to do this.
pasteIO :: Builder n -> MutableByteArray RealWorld -> Int -> IO Int
{-# inline pasteIO #-}
pasteIO b m off = stToIO (pasteST b m off)
unST :: ST s a -> State# s -> (# State# s, a #)
unST (ST f) = f

View file

@ -0,0 +1,94 @@
{-# language BangPatterns #-}
{-# language DuplicateRecordFields #-}
{-# language LambdaCase #-}
{-# language MagicHash #-}
{-# language RankNTypes #-}
{-# language ScopedTypeVariables #-}
{-# language UnboxedTuples #-}
module Data.ByteArray.Builder.Unsafe
( -- * Types
Builder(..)
-- * Safe Functions
-- | These functions are actually completely safe, but they are defined
-- here because they are used by typeclass instances. Import them from
-- @Data.ByteArray.Builder@ instead.
, stringUtf8
, cstring
) where
import Data.Primitive (MutableByteArray(MutableByteArray))
import Foreign.C.String (CString)
import GHC.Exts ((-#),(+#),(/=#),(>#))
import GHC.Exts (Addr#,Int(I#),Ptr(Ptr))
import GHC.Exts (IsString,Int#,State#,MutableByteArray#)
import GHC.ST (ST(ST))
import GHC.Base (unpackCString#,unpackCStringUtf8#)
import qualified GHC.Exts as Exts
import qualified Data.ByteArray.Builder.Bounded as Bounded
import qualified Data.ByteArray.Builder.Bounded.Unsafe as UnsafeBounded
-- | An unmaterialized sequence of bytes that may be pasted
-- into a mutable byte array.
newtype Builder = Builder
-- This functions takes an offset and a number of remaining bytes
-- and returns the new offset.
(forall s. MutableByteArray# s -> Int# -> Int# -> State# s -> (# State# s, Int# #))
instance IsString Builder where
{-# inline fromString #-}
fromString = stringUtf8
instance Semigroup Builder where
{-# inline (<>) #-}
Builder f <> Builder g = Builder $ \arr off0 len0 s0 -> case f arr off0 len0 s0 of
(# s1, r #) -> case r /=# (-1#) of
1# -> g arr r (len0 +# (off0 -# r)) s1
_ -> (# s1, (-1#) #)
instance Monoid Builder where
{-# inline mempty #-}
mempty = Builder $ \_ off0 _ s0 -> (# s0, off0 #)
-- | Create a builder from a cons-list of 'Char'. These
-- are be UTF-8 encoded.
stringUtf8 :: String -> Builder
{-# inline stringUtf8 #-}
stringUtf8 cs = Builder (\arr off0 len0 s0 -> goString cs arr off0 len0 s0)
-- | Create a builder from a @NUL@-terminated 'CString'. This ignores any
-- textual encoding, copying bytes until @NUL@ is reached.
cstring :: CString -> Builder
{-# inline cstring #-}
cstring (Ptr cs) = Builder (\arr off0 len0 s0 -> goCString cs arr off0 len0 s0)
goString :: String -> MutableByteArray# s -> Int# -> Int# -> State# s -> (# State# s, Int# #)
{-# noinline goString #-}
goString [] _ off0 _ s0 = (# s0, off0 #)
goString (c : cs) buf off0 len0 s0 = case len0 ># 3# of
1# -> case unST (UnsafeBounded.pasteST (Bounded.char c) (MutableByteArray buf) (I# off0)) s0 of
(# s1, I# off1 #) -> goString cs buf off1 (len0 -# (off1 -# off0)) s1
_ -> (# s0, (-1#) #)
-- We have to have a rule for both unpackCString# and unpackCStringUtf8#
-- since GHC uses a different function based on whether or not non-ASCII
-- codepoints are used in the string.
{-# RULES
"Builder stringUtf8/cstring" forall s a b c d.
goString (unpackCString# s) a b c d = goCString s a b c d
"Builder stringUtf8/cstring-utf8" forall s a b c d.
goString (unpackCStringUtf8# s) a b c d = goCString s a b c d
#-}
goCString :: Addr# -> MutableByteArray# s -> Int# -> Int# -> State# s -> (# State# s, Int# #)
goCString addr buf off0 len0 s0 = case Exts.indexWord8OffAddr# addr 0# of
0## -> (# s0, off0 #)
w -> case len0 of
0# -> (# s0, (-1#) #)
_ -> case Exts.writeWord8Array# buf off0 w s0 of
s1 -> goCString (Exts.plusAddr# addr 1# ) buf (off0 +# 1# ) (len0 -# 1# ) s1
unST :: ST s a -> State# s -> (# State# s, a #)
unST (ST f) = f

View file

@ -5,7 +5,7 @@
import Control.Monad.ST (runST) import Control.Monad.ST (runST)
import Data.Bytes.Types (MutableBytes(..)) import Data.Bytes.Types (MutableBytes(..))
import Data.ByteArray.Builder.Small import Data.ByteArray.Builder
import Data.Word import Data.Word
import Data.Char (ord) import Data.Char (ord)
import Data.Primitive (ByteArray) import Data.Primitive (ByteArray)
@ -14,15 +14,19 @@ import Test.Tasty (defaultMain,testGroup,TestTree)
import Test.QuickCheck ((===)) import Test.QuickCheck ((===))
import Text.Printf (printf) import Text.Printf (printf)
import Test.Tasty.HUnit ((@=?)) import Test.Tasty.HUnit ((@=?))
import qualified Data.ByteString as ByteString
import qualified Data.ByteString.Builder as BB import qualified Data.ByteString.Builder as BB
import qualified Data.Primitive as PM
import qualified Data.List as L
import qualified Data.Vector as V
import qualified Test.Tasty.QuickCheck as TQC
import qualified Test.QuickCheck as QC
import qualified GHC.Exts as Exts
import qualified Data.ByteString.Lazy.Char8 as LB import qualified Data.ByteString.Lazy.Char8 as LB
import qualified Data.List as L
import qualified Data.Primitive as PM
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import qualified Data.Vector as V
import qualified GHC.Exts as Exts
import qualified Test.QuickCheck as QC
import qualified Test.Tasty.HUnit as THU import qualified Test.Tasty.HUnit as THU
import qualified Test.Tasty.QuickCheck as TQC
import qualified HexWord64 import qualified HexWord64
@ -54,6 +58,9 @@ tests = testGroup "Tests"
(runArray word64Dec (V.fromList xs)) (runArray word64Dec (V.fromList xs))
=== ===
pack (foldMap show xs) pack (foldMap show xs)
, THU.testCase "stringUtf8" $
packUtf8 "¿Cómo estás? I am doing well." @=?
run 1 (stringUtf8 "¿Cómo estás? I am doing well.")
, THU.testCase "doubleDec-A" $ , THU.testCase "doubleDec-A" $
pack (show (2 :: Int)) @=? run 1 (doubleDec 2.0) pack (show (2 :: Int)) @=? run 1 (doubleDec 2.0)
, THU.testCase "doubleDec-B" $ , THU.testCase "doubleDec-B" $
@ -88,8 +95,8 @@ tests = testGroup "Tests"
, testGroup "alternate" , testGroup "alternate"
[ TQC.testProperty "HexWord64" $ \x y -> [ TQC.testProperty "HexWord64" $ \x y ->
run 1 run 1
( fromUnsafe (HexWord64.word64PaddedUpperHex x) ( fromBounded (HexWord64.word64PaddedUpperHex x)
<> fromUnsafe (HexWord64.word64PaddedUpperHex y) <> fromBounded (HexWord64.word64PaddedUpperHex y)
) )
=== ===
pack (showWord64PaddedUpperHex x <> showWord64PaddedUpperHex y) pack (showWord64PaddedUpperHex x <> showWord64PaddedUpperHex y)
@ -99,6 +106,9 @@ tests = testGroup "Tests"
pack :: String -> ByteArray pack :: String -> ByteArray
pack = Exts.fromList . map (fromIntegral @Int @Word8 . ord) pack = Exts.fromList . map (fromIntegral @Int @Word8 . ord)
packUtf8 :: String -> ByteArray
packUtf8 = Exts.fromList . ByteString.unpack . TE.encodeUtf8 . T.pack
-- This is used to test pasteArrayST -- This is used to test pasteArrayST
runArray :: runArray ::
(a -> Builder) -- ^ Builder (a -> Builder) -- ^ Builder