Rename library to bytebuild. Use Data.Bytes namespace.

2020-02-27 13:58:00 -05:00 · 2020-02-27 13:58:00 -05:00 · 1908d53a14
commit 1908d53a14
parent e9de3f30db
11 changed files with 46 additions and 36 deletions
--- a/src/Data/Bytes/Builder.hs
+++ b/src/Data/Bytes/Builder.hs
--- a/src/Data/Bytes/Builder/Bounded.hs
+++ b/src/Data/Bytes/Builder/Bounded.hs
--- a/src/Data/Bytes/Builder/Bounded/Unsafe.hs
+++ b/src/Data/Bytes/Builder/Bounded/Unsafe.hs
@ -0,0 +1,62 @@
+{-# language DataKinds #-}
+{-# language GADTSyntax #-}
+{-# language KindSignatures #-}
+{-# language MagicHash #-}
+{-# language RankNTypes #-}
+{-# language ScopedTypeVariables #-}
+{-# language UnboxedTuples #-}
+
+module Data.Bytes.Builder.Bounded.Unsafe
+  ( -- * Types
+    Builder(..)
+    -- * Construct
+  , construct
+    -- * Run
+  , pasteST
+  , pasteIO
+  ) where
+
+import Data.Kind (Type)
+import Data.Primitive (MutableByteArray(..))
+import GHC.Exts (Int(I#),RealWorld,Int#,State#,MutableByteArray#)
+import GHC.IO (stToIO)
+import GHC.ST (ST(ST))
+import GHC.TypeLits (Nat)
+
+-- | A builder parameterized by the maximum number of bytes it uses
+-- when executed.
+newtype Builder :: Nat -> Type where
+   Builder ::
+        (forall s. MutableByteArray# s -> Int# -> State# s -> (# State# s, Int# #)) 
+        -- ^ This function takes a buffer, an offset, and a number of remaining bytes.
+        --   It returns the new offset.
+     -> Builder n
+   
+
+-- | Constructor for 'Builder' that works on a function with lifted
+-- arguments instead of unlifted ones. This is just as unsafe as the
+-- actual constructor.
+construct :: (forall s. MutableByteArray s -> Int -> ST s Int) -> Builder n
+{-# inline construct #-}
+construct f = Builder
+  $ \arr off s0 ->
+    case unST (f (MutableByteArray arr) (I# off)) s0 of
+      (# s1, (I# n) #) -> (# s1, n #)
+
+-- | This function does not enforce the known upper bound on the
+-- size. It is up to the user to do this.
+pasteST :: Builder n -> MutableByteArray s -> Int -> ST s Int
+{-# inline pasteST #-}
+pasteST (Builder f) (MutableByteArray arr) (I# off) =
+  ST $ \s0 -> case f arr off s0 of
+    (# s1, r #) -> (# s1, (I# r) #)
+
+-- | This function does not enforce the known upper bound on the
+-- size. It is up to the user to do this.
+pasteIO :: Builder n -> MutableByteArray RealWorld -> Int -> IO Int
+{-# inline pasteIO #-}
+pasteIO b m off = stToIO (pasteST b m off)
+
+unST :: ST s a -> State# s -> (# State# s, a #)
+unST (ST f) = f
+
--- a/src/Data/Bytes/Builder/Unsafe.hs
+++ b/src/Data/Bytes/Builder/Unsafe.hs
@ -0,0 +1,260 @@
+{-# language BangPatterns #-}
+{-# language DuplicateRecordFields #-}
+{-# language LambdaCase #-}
+{-# language MagicHash #-}
+{-# language RankNTypes #-}
+{-# language ScopedTypeVariables #-}
+{-# language UnboxedTuples #-}
+
+module Data.Bytes.Builder.Unsafe
+  ( -- * Types
+    Builder(..)
+  , BuilderState(..)
+  , Commits(..)
+    -- * Execution
+  , pasteST
+  , pasteIO
+    -- * Construction
+  , fromEffect
+    -- * Finalization
+  , reverseCommitsOntoChunks
+  , commitsOntoChunks
+  , copyReverseCommits
+  , addCommitsLength
+    -- * Safe Functions
+    -- | These functions are actually completely safe, but they are defined
+    -- here because they are used by typeclass instances. Import them from
+    -- @Data.Bytes.Builder@ instead.
+  , stringUtf8
+  , cstring
+  ) where
+
+import Control.Monad.Primitive (primitive_)
+import Data.Bytes.Chunks (Chunks(ChunksCons))
+import Data.Bytes.Types (Bytes(Bytes))
+import Data.Primitive (MutableByteArray(..),ByteArray(..))
+import Foreign.C.String (CString)
+import GHC.Base (unpackCString#,unpackCStringUtf8#)
+import GHC.Exts ((-#),(+#),(>#),(>=#))
+import GHC.Exts (Addr#,ByteArray#,MutableByteArray#,Int(I#),Ptr(Ptr))
+import GHC.Exts (RealWorld,IsString,Int#,State#)
+import GHC.ST (ST(ST))
+import GHC.IO (stToIO)
+
+import qualified Data.Bytes.Builder.Bounded as Bounded
+import qualified Data.Bytes.Builder.Bounded.Unsafe as UnsafeBounded
+import qualified Data.Primitive as PM
+import qualified GHC.Exts as Exts
+
+-- | An unmaterialized sequence of bytes that may be pasted
+-- into a mutable byte array.
+newtype Builder
+  = Builder (forall s.
+      MutableByteArray# s ->   -- buffer we are currently writing to
+      Int# ->   -- offset into the current buffer
+      Int# ->   -- number of bytes remaining in the current buffer
+      Commits s ->   -- buffers and immutable byte slices that we have already committed
+      State# s ->
+      (# State# s, MutableByteArray# s, Int#, Int#, Commits s #) -- all the same things
+    )
+
+data BuilderState s = BuilderState
+  (MutableByteArray# s) -- buffer we are currently writing to
+  Int# -- offset into the current buffer
+  Int# -- number of bytes remaining in the current buffer
+  !(Commits s) -- buffers and immutable byte slices that are already committed
+
+-- | Run a builder, performing an in-place update on the state.
+-- The @BuilderState@ argument must not be reused after being passed
+-- to this function. That is, its use must be affine.
+pasteST :: Builder -> BuilderState s -> ST s (BuilderState s)
+{-# inline pasteST #-}
+pasteST (Builder f) (BuilderState buf off len cmts) = ST $ \s0 ->
+  case f buf off len cmts s0 of
+    (# s1, buf1, off1, len1, cmts1 #) ->
+      (# s1, BuilderState buf1 off1 len1 cmts1 #)
+
+-- | Variant of 'pasteST' that runs in 'IO'.
+pasteIO :: Builder -> BuilderState RealWorld -> IO (BuilderState RealWorld)
+{-# inline pasteIO #-}
+pasteIO b st = stToIO (pasteST b st)
+
+instance IsString Builder where
+  {-# inline fromString #-}
+  fromString = stringUtf8
+
+instance Semigroup Builder where
+  {-# inline (<>) #-}
+  Builder f <> Builder g = Builder $ \buf0 off0 len0 cs0 s0 -> case f buf0 off0 len0 cs0 s0 of
+    (# s1, buf1, off1, len1, cs1 #) -> g buf1 off1 len1 cs1 s1
+
+instance Monoid Builder where
+  {-# inline mempty #-}
+  mempty = Builder $ \buf0 off0 len0 cs0 s0 -> (# s0, buf0, off0, len0, cs0 #)
+
+data Commits s
+  = Mutable
+      (MutableByteArray# s)
+      -- ^ Mutable buffer, start index implicitly zero
+      Int# -- ^ Length (may be smaller than actual length)
+      !(Commits s)
+  | Immutable
+      ByteArray# -- ^ Immutable chunk
+      Int# -- ^ Offset into chunk, not necessarily zero
+      Int# -- ^ Length (may be smaller than actual length)
+      !(Commits s)
+  | Initial
+
+-- | Add the total number of bytes in the commits to first
+-- argument.
+addCommitsLength :: Int -> Commits s -> Int
+addCommitsLength !acc Initial = acc
+addCommitsLength !acc (Immutable _ _ x cs) = addCommitsLength (acc + I# x) cs
+addCommitsLength !acc (Mutable _ x cs) = addCommitsLength (acc + I# x) cs
+
+-- | Cons the chunks from a list of @Commits@ onto an initial
+-- @Chunks@ list (this argument is often @ChunksNil@). This reverses
+-- the order of the chunks, which is desirable since builders assemble
+-- @Commits@ with the chunks backwards. This performs an in-place shrink
+-- and freezes any mutable byte arrays it encounters. Consequently,
+-- these must not be reused.
+reverseCommitsOntoChunks :: Chunks -> Commits s -> ST s Chunks
+reverseCommitsOntoChunks !xs Initial = pure xs
+reverseCommitsOntoChunks !xs (Immutable arr off len cs) =
+  reverseCommitsOntoChunks (ChunksCons (Bytes (ByteArray arr) (I# off) (I# len)) xs) cs
+reverseCommitsOntoChunks !xs (Mutable buf len cs) = case len of
+  -- Skip over empty byte arrays.
+  0# -> reverseCommitsOntoChunks xs cs
+  _ -> do
+    shrinkMutableByteArray (MutableByteArray buf) (I# len)
+    arr <- PM.unsafeFreezeByteArray (MutableByteArray buf)
+    reverseCommitsOntoChunks (ChunksCons (Bytes arr 0 (I# len)) xs) cs
+
+-- | Variant of 'reverseCommitsOntoChunks' that does not reverse
+-- the order of the commits. Since commits are built backwards by
+-- consing, this means that the chunks appended to the front will
+-- be backwards. Within each chunk, however, the bytes will be in
+-- the correct order.
+--
+-- Unlike 'reverseCommitsOntoChunks', this function is not tail
+-- recursive.
+commitsOntoChunks :: Chunks -> Commits s -> ST s Chunks
+commitsOntoChunks !xs0 cs0 = go cs0
+  where
+  go Initial = pure xs0
+  go (Immutable arr off len cs) = do
+    xs <- go cs
+    pure $! ChunksCons (Bytes (ByteArray arr) (I# off) (I# len)) xs
+  go (Mutable buf len cs) = case len of
+    -- Skip over empty byte arrays.
+    0# -> go cs
+    _ -> do
+      shrinkMutableByteArray (MutableByteArray buf) (I# len)
+      arr <- PM.unsafeFreezeByteArray (MutableByteArray buf)
+      xs <- go cs
+      pure $! ChunksCons (Bytes arr 0 (I# len)) xs
+
+-- | Copy the contents of the chunks into a mutable array, reversing
+-- the order of the chunks.
+-- Precondition: The destination must have enough space to house the
+-- contents. This is not checked.
+copyReverseCommits ::
+     MutableByteArray s -- ^ Destination
+  -> Int -- ^ Destination range successor
+  -> Commits s -- ^ Source
+  -> ST s Int
+{-# inline copyReverseCommits #-}
+copyReverseCommits (MutableByteArray dst) (I# off) cs = ST
+  (\s0 -> case copyReverseCommits# dst off cs s0 of
+    (# s1, nextOff #) -> (# s1, I# nextOff #)
+  )
+
+copyReverseCommits# ::
+     MutableByteArray# s
+  -> Int#
+  -> Commits s
+  -> State# s
+  -> (# State# s, Int# #)
+copyReverseCommits# _ off Initial s0 = (# s0, off #)
+copyReverseCommits# marr prevOff (Mutable arr sz cs) s0 =
+  let !off = prevOff -# sz in
+  case Exts.copyMutableByteArray# arr 0# marr off sz s0 of
+    s1 -> copyReverseCommits# marr off cs s1
+copyReverseCommits# marr prevOff (Immutable arr soff sz cs) s0 =
+  let !off = prevOff -# sz in
+  case Exts.copyByteArray# arr soff marr off sz s0 of
+    s1 -> copyReverseCommits# marr off cs s1
+
+-- | Create a builder from a cons-list of 'Char'. These
+-- are be UTF-8 encoded.
+stringUtf8 :: String -> Builder
+{-# inline stringUtf8 #-}
+stringUtf8 cs = Builder (goString cs)
+
+-- | Create a builder from a @NUL@-terminated 'CString'. This ignores any
+-- textual encoding, copying bytes until @NUL@ is reached.
+cstring :: CString -> Builder
+{-# inline cstring #-}
+cstring (Ptr cs) = Builder (goCString cs)
+
+goString :: String
+  -> MutableByteArray# s -> Int# -> Int# -> Commits s
+  -> State# s -> (# State# s, MutableByteArray# s, Int#, Int#, Commits s #)
+{-# noinline goString #-}
+goString [] buf0 off0 len0 cs0 s0 = (# s0, buf0, off0, len0, cs0 #)
+goString (c : cs) buf0 off0 len0 cs0 s0 = case len0 ># 3# of
+  1# -> case unST (UnsafeBounded.pasteST (Bounded.char c) (MutableByteArray buf0) (I# off0)) s0 of
+    (# s1, I# off1 #) -> goString cs buf0 off1 (len0 -# (off1 -# off0)) cs0 s1
+  _ -> case Exts.newByteArray# 4080# s0 of
+    (# s1, buf1 #) -> case unST (UnsafeBounded.pasteST (Bounded.char c) (MutableByteArray buf1) 0) s1 of
+      (# s2, I# off1 #) -> goString cs buf1 off1 (4080# -# off1) (Mutable buf0 off0 cs0) s2
+
+-- We have to have a rule for both unpackCString# and unpackCStringUtf8#
+-- since GHC uses a different function based on whether or not non-ASCII
+-- codepoints are used in the string.
+-- TODO: The UTF-8 variant of this rule is unsound because GHC actually
+-- used Modified UTF-8.
+{-# RULES
+"Builder stringUtf8/cstring" forall s a b c d e.
+  goString (unpackCString# s) a b c d e = goCString s a b c d e
+"Builder stringUtf8/cstring-utf8" forall s a b c d e.
+  goString (unpackCStringUtf8# s) a b c d e = goCString s a b c d e
+#-}
+
+goCString :: Addr# -> MutableByteArray# s -> Int# -> Int# -> Commits s
+  -> State# s -> (# State# s, MutableByteArray# s, Int#, Int#, Commits s #)
+goCString addr buf0 off0 len0 cs0 s0 = case Exts.indexWord8OffAddr# addr 0# of
+  0## -> (# s0, buf0, off0, len0, cs0 #)
+  w -> case len0 of
+    0# -> case Exts.newByteArray# 4080# s0 of
+      (# s1, buf1 #) -> case Exts.writeWord8Array# buf1 0# w s1 of
+        s2 -> goCString
+          (Exts.plusAddr# addr 1# ) buf1 1# (4080# -# 1# )
+          (Mutable buf0 off0 cs0)
+          s2
+    _ -> case Exts.writeWord8Array# buf0 off0 w s0 of
+      s1 -> goCString (Exts.plusAddr# addr 1# ) buf0 (off0 +# 1# ) (len0 -# 1# ) cs0 s1
+
+fromEffect ::
+     Int -- ^ Maximum number of bytes the paste function needs
+  -> (forall s. MutableByteArray s -> Int -> ST s Int)
+     -- ^ Paste function. Takes a byte array and an offset and returns
+     -- the new offset and having pasted into the buffer.
+  -> Builder
+{-# inline fromEffect #-}
+fromEffect (I# req) f = Builder $ \buf0 off0 len0 cs0 s0 ->
+  let !(# s1, buf1, off1, len1, cs1 #) = case len0 >=# req of
+        1# -> (# s0, buf0, off0, len0, cs0 #)
+        _ -> let !(I# lenX) = max 4080 (I# req) in
+          case Exts.newByteArray# lenX s0 of
+            (# sX, bufX #) ->
+              (# sX, bufX, 0#, lenX, Mutable buf0 off0 cs0 #)
+   in case unST (f (MutableByteArray buf1) (I# off1)) s1 of
+        (# s2, I# off2 #) -> (# s2, buf1, off2, len1 -# (off2 -# off1), cs1 #)
+
+unST :: ST s a -> State# s -> (# State# s, a #)
+unST (ST f) = f
+
+shrinkMutableByteArray :: MutableByteArray s -> Int -> ST s ()
+shrinkMutableByteArray (MutableByteArray arr) (I# sz) =
+  primitive_ (Exts.shrinkMutableByteArray# arr sz)