From 9a14600a388c00d07b00b69126cd3e7b5ee9b1dd Mon Sep 17 00:00:00 2001
From: Alice McKean <mckean.kylej@gmail.com>
Date: Sun, 1 Sep 2019 22:43:54 -0700
Subject: [PATCH 1/4] switch unsafe to bounded + char encoding

---
 small-bytearray-builder.cabal                 |   2 +-
 src/Data/ByteArray/Builder/Small.hs           |  57 +++++---
 .../Builder/Small/{Unsafe.hs => Bounded.hs}   | 122 +++++++++++++++++-
 3 files changed, 152 insertions(+), 29 deletions(-)
 rename src/Data/ByteArray/Builder/Small/{Unsafe.hs => Bounded.hs} (79%)

diff --git a/small-bytearray-builder.cabal b/small-bytearray-builder.cabal
index 8fe031a..f5e511f 100644
--- a/small-bytearray-builder.cabal
+++ b/small-bytearray-builder.cabal
@@ -37,7 +37,7 @@ flag checked
 library
   exposed-modules:
     Data.ByteArray.Builder.Small
-    Data.ByteArray.Builder.Small.Unsafe
+    Data.ByteArray.Builder.Small.Bounded
   build-depends:
     , base >=4.12.0.0 && <5
     , byteslice >=0.1 && <0.2
diff --git a/src/Data/ByteArray/Builder/Small.hs b/src/Data/ByteArray/Builder/Small.hs
index f2f0e54..a9cfabe 100644
--- a/src/Data/ByteArray/Builder/Small.hs
+++ b/src/Data/ByteArray/Builder/Small.hs
@@ -7,10 +7,10 @@
 {-# language UnboxedTuples #-}
 
 module Data.ByteArray.Builder.Small
-  ( -- * Unsafe Primitives
+  ( -- * Bounded Primitives
     Builder(..)
   , construct
-  , fromUnsafe
+  , fromBounded
     -- * Evaluation
   , run
   , pasteST
@@ -34,10 +34,13 @@ module Data.ByteArray.Builder.Small
   , word32PaddedUpperHex
   , word16PaddedUpperHex
   , word8PaddedUpperHex
+  , ascii
+  , char
     -- ** Machine-Readable
   , word64BE
   , word32BE
   , word16BE
+  , word8
     -- * Encode Floating-Point Types
     -- ** Human-Readable
   , doubleDec
@@ -62,7 +65,7 @@ import qualified GHC.Exts as Exts
 import qualified Data.Text.Short as TS
 import qualified Data.Primitive as PM
 import qualified Data.Vector as V
-import qualified Data.ByteArray.Builder.Small.Unsafe as Unsafe
+import qualified Data.ByteArray.Builder.Small.Bounded as Bounded
 
 -- | An unmaterialized sequence of bytes that may be pasted
 -- into a mutable byte array.
@@ -183,9 +186,9 @@ construct f = Builder
         Nothing -> (# s1, (-1#) #)
         Just (I# n) -> (# s1, n #)
 
-fromUnsafe :: forall n. KnownNat n => Unsafe.Builder n -> Builder
-{-# inline fromUnsafe #-}
-fromUnsafe (Unsafe.Builder f) = Builder $ \arr off len s0 ->
+fromBounded :: forall n. KnownNat n => Bounded.Builder n -> Builder
+{-# inline fromBounded #-}
+fromBounded (Bounded.Builder f) = Builder $ \arr off len s0 ->
   case fromIntegral (natVal' (proxy# :: Proxy# n)) of
     I# req -> case len >=# req of
       1# -> f arr off s0
@@ -223,8 +226,8 @@ slicedUtf8TextJson !src# !soff0# !slen0# = construct $ \(MutableBytes dst doff0
                     then PM.writeByteArray dst doff (c2w c) *> go (soff + 1) (slen - 1) (doff + 1)
                     else do
                       write2 dst doff '\\' 'u'
-                      doff' <- Unsafe.pasteST
-                        (Unsafe.word16PaddedUpperHex (fromIntegral (c2w c)))
+                      doff' <- Bounded.pasteST
+                        (Bounded.word16PaddedUpperHex (fromIntegral (c2w c)))
                         dst (doff + 2)
                       go (soff + 1) (slen - 1) doff'
                 else pure doff
@@ -262,61 +265,70 @@ shortTextJsonString a =
 -- This encoding never starts with a zero unless the
 -- argument was zero.
 word64Dec :: Word64 -> Builder
-word64Dec w = fromUnsafe (Unsafe.word64Dec w)
+word64Dec w = fromBounded (Bounded.word64Dec w)
 
 -- | Encodes an unsigned 16-bit integer as decimal.
 -- This encoding never starts with a zero unless the
 -- argument was zero.
 word32Dec :: Word32 -> Builder
-word32Dec w = fromUnsafe (Unsafe.word32Dec w)
+word32Dec w = fromBounded (Bounded.word32Dec w)
 
 -- | Encodes an unsigned 16-bit integer as decimal.
 -- This encoding never starts with a zero unless the
 -- argument was zero.
 word16Dec :: Word16 -> Builder
-word16Dec w = fromUnsafe (Unsafe.word16Dec w)
+word16Dec w = fromBounded (Bounded.word16Dec w)
 
 -- | Encode a double-floating-point number, using decimal notation or
 -- scientific notation depending on the magnitude. This has undefined
 -- behavior when representing @+inf@, @-inf@, and @NaN@. It will not
 -- crash, but the generated numbers will be nonsense.
 doubleDec :: Double -> Builder
-doubleDec w = fromUnsafe (Unsafe.doubleDec w)
+doubleDec w = fromBounded (Bounded.doubleDec w)
 
 -- | Encodes a signed 64-bit integer as decimal.
 -- This encoding never starts with a zero unless the argument was zero.
 -- Negative numbers are preceded by a minus sign. Positive numbers
 -- are not preceded by anything.
 int64Dec :: Int64 -> Builder
-int64Dec w = fromUnsafe (Unsafe.int64Dec w)
+int64Dec w = fromBounded (Bounded.int64Dec w)
 
 -- | Encode a 64-bit unsigned integer as hexadecimal, zero-padding
 -- the encoding to 16 digits. This uses uppercase for the alphabetical
 -- digits. For example, this encodes the number 1022 as @00000000000003FE@.
 word64PaddedUpperHex :: Word64 -> Builder
 word64PaddedUpperHex w =
-  fromUnsafe (Unsafe.word64PaddedUpperHex w)
+  fromBounded (Bounded.word64PaddedUpperHex w)
 
 -- | Encode a 32-bit unsigned integer as hexadecimal, zero-padding
 -- the encoding to 8 digits. This uses uppercase for the alphabetical
 -- digits. For example, this encodes the number 1022 as @000003FE@.
 word32PaddedUpperHex :: Word32 -> Builder
 word32PaddedUpperHex w =
-  fromUnsafe (Unsafe.word32PaddedUpperHex w)
+  fromBounded (Bounded.word32PaddedUpperHex w)
 
 -- | Encode a 16-bit unsigned integer as hexadecimal, zero-padding
 -- the encoding to 4 digits. This uses uppercase for the alphabetical
 -- digits. For example, this encodes the number 1022 as @03FE@.
 word16PaddedUpperHex :: Word16 -> Builder
 word16PaddedUpperHex w =
-  fromUnsafe (Unsafe.word16PaddedUpperHex w)
+  fromBounded (Bounded.word16PaddedUpperHex w)
 
 -- | Encode a 8-bit unsigned integer as hexadecimal, zero-padding
 -- the encoding to 2 digits. This uses uppercase for the alphabetical
 -- digits. For example, this encodes the number 11 as @0B@.
 word8PaddedUpperHex :: Word8 -> Builder
 word8PaddedUpperHex w =
-  fromUnsafe (Unsafe.word8PaddedUpperHex w)
+  fromBounded (Bounded.word8PaddedUpperHex w)
+
+-- | Encode an ASCII char.
+-- Precondition: Input must be an ASCII character. This is not checked.
+ascii :: Char -> Builder
+ascii c = fromBounded (Bounded.char c)
+
+-- | Encode an UTF8 char. This only uses as much space as is required.
+char :: Char -> Builder
+char c = fromBounded (Bounded.char c)
 
 unST :: ST s a -> State# s -> (# State# s, a #)
 unST (ST f) = f
@@ -328,17 +340,20 @@ shrinkMutableByteArray (MutableByteArray arr) (I# sz) =
 -- | Requires exactly 8 bytes. Dump the octets of a 64-bit
 -- word in a big-endian fashion.
 word64BE :: Word64 -> Builder
-word64BE w = fromUnsafe (Unsafe.word64BE w)
+word64BE w = fromBounded (Bounded.word64BE w)
 
 -- | Requires exactly 4 bytes. Dump the octets of a 32-bit
 -- word in a big-endian fashion.
 word32BE :: Word32 -> Builder
-word32BE w = fromUnsafe (Unsafe.word32BE w)
+word32BE w = fromBounded (Bounded.word32BE w)
 
 -- | Requires exactly 2 bytes. Dump the octets of a 16-bit
 -- word in a big-endian fashion.
 word16BE :: Word16 -> Builder
-word16BE w = fromUnsafe (Unsafe.word16BE w)
+word16BE w = fromBounded (Bounded.word16BE w)
+
+word8 :: Word8 -> Builder
+word8 w = fromBounded (Bounded.word8 w)
 
 -- ShortText is already UTF-8 encoded. This is a no-op.
 shortTextToByteArray :: ShortText -> ByteArray
@@ -349,4 +364,4 @@ indexChar8Array :: ByteArray -> Int -> Char
 indexChar8Array (ByteArray b) (I# i) = C# (Exts.indexCharArray# b i)
 
 c2w :: Char -> Word8
-c2w = fromIntegral . ord 
+c2w = fromIntegral . ord
diff --git a/src/Data/ByteArray/Builder/Small/Unsafe.hs b/src/Data/ByteArray/Builder/Small/Bounded.hs
similarity index 79%
rename from src/Data/ByteArray/Builder/Small/Unsafe.hs
rename to src/Data/ByteArray/Builder/Small/Bounded.hs
index 1ae6695..429e0c5 100644
--- a/src/Data/ByteArray/Builder/Small/Unsafe.hs
+++ b/src/Data/ByteArray/Builder/Small/Bounded.hs
@@ -1,8 +1,9 @@
-{-# language GADTSyntax #-}
+{-# language GADTs #-}
 {-# language KindSignatures #-}
 {-# language ScopedTypeVariables #-}
 {-# language BangPatterns #-}
 {-# language MagicHash #-}
+{-# language BinaryLiterals #-}
 {-# language UnboxedTuples #-}
 {-# language RankNTypes #-}
 {-# language LambdaCase #-}
@@ -10,9 +11,8 @@
 {-# language DataKinds #-}
 {-# language TypeApplications #-}
 
--- | The functions in this module do not check to
--- see if there is enough space in the buffer.
-module Data.ByteArray.Builder.Small.Unsafe
+-- | The functions in this module are explict in the amount of bytes they require.
+module Data.ByteArray.Builder.Small.Bounded
   ( -- * Builder
     Builder(..)
   , construct
@@ -23,6 +23,11 @@ module Data.ByteArray.Builder.Small.Unsafe
   , pasteIO
     -- * Combine
   , append
+    -- * Bounds Manipulation
+  , (<=)
+  , lessThanEqual
+  , isLessThanEqual
+  , raise
     -- * Encode Integral Types
     -- ** Human-Readable
   , word64Dec
@@ -33,6 +38,8 @@ module Data.ByteArray.Builder.Small.Unsafe
   , word32PaddedUpperHex
   , word16PaddedUpperHex
   , word8PaddedUpperHex
+  , ascii
+  , char
     -- ** Machine-Readable
   , word64BE
   , word32BE
@@ -53,8 +60,9 @@ import GHC.Word
 import GHC.Int
 import Data.Kind
 import GHC.TypeLits (KnownNat,Nat,type (+),natVal')
+import qualified GHC.TypeLits as GHC
 import Data.Primitive.ByteArray.Offset (MutableByteArrayOffset(..))
-import Control.Monad (when)
+import qualified Control.Category as Cat
 
 import qualified Data.Primitive as PM
 
@@ -65,13 +73,16 @@ newtype Builder :: Nat -> Type where
         (forall s. MutableByteArray# s -> Int# -> State# s -> (# State# s, Int# #))
      -> Builder n
 
+knownNat :: KnownNat n => Proxy# n -> Int
+knownNat p = fromIntegral (natVal' p)
+
 -- | Execute the builder. This function is safe.
 run :: forall n. KnownNat n
   => Builder n -- ^ Builder
   -> ByteArray
 {-# inline run #-}
 run b = runST $ do
-  arr <- newByteArray (fromIntegral (natVal' (proxy# :: Proxy# n)))
+  arr <- newByteArray (knownNat (proxy# :: Proxy# n))
   len <- pasteST b arr 0
   shrinkMutableByteArray arr len
   unsafeFreezeByteArray arr
@@ -96,7 +107,7 @@ pasteGrowST :: forall n s. KnownNat n
 {-# inline pasteGrowST #-}
 pasteGrowST b !(MutableByteArrayOffset{array=arr0,offset=off0}) = do
   sz0 <- PM.getSizeofMutableByteArray arr0
-  let req = fromIntegral (natVal' (proxy# :: Proxy# n))
+  let req = knownNat (proxy# :: Proxy# n)
   let sz1 = off0 + req
   if sz1 <= sz0
     then do
@@ -131,6 +142,33 @@ append (Builder f) (Builder g) =
   Builder $ \arr off0 s0 -> case f arr off0 s0 of
     (# s1, r #) -> g arr r s1
 
+-- | A proof that n is less than or equal to m
+newtype (n :: Nat) <= (m :: Nat) = LessThanEqual Int -- m - n
+
+instance Cat.Category (<=) where
+  id = LessThanEqual 0
+  -- b <= c (c - b) -> a <= b (b - a) -> a <= c (c - a)
+  LessThanEqual cb . LessThanEqual ba = LessThanEqual (cb + ba)
+
+-- | Dynamically check than 'n' is less than or equal to 'm'
+isLessThanEqual :: (KnownNat n, KnownNat m) => Proxy# n -> Proxy# m -> Maybe (n <= m)
+isLessThanEqual n m = if 0 <= diff then Just (LessThanEqual diff) else Nothing
+  where diff = knownNat m - knownNat n
+
+-- | Statically check than 'n' is less than or equal to 'm'. 'n' and 'm' must be known at compile time.
+lessThanEqual :: forall n m. (KnownNat n, KnownNat m, n GHC.<= m) => n <= m
+lessThanEqual = LessThanEqual (knownNat (proxy# :: Proxy# m) - knownNat (proxy# :: Proxy# n))
+
+-- | Weaken the bound on the maximum number of bytes required.
+-- >>> :{
+-- buildNumber :: Either Double Word64 -> Builder 32
+-- buildNumber = \case
+--   Left d  -> doubleDec d
+--   Right w -> raise lessThanEqual (word64Dec w)
+-- :}
+raise :: forall m n. n <= m -> Builder n -> Builder m
+raise !_ (Builder f) = Builder f
+
 -- | Encode a double-floating-point number, using decimal notation or
 -- scientific notation depending on the magnitude. This has undefined
 -- behavior when representing @+inf@, @-inf@, and @NaN@. It will not
@@ -302,6 +340,76 @@ word8PaddedUpperHex# w# = construct $ \arr off -> do
   where
   w = W# w#
 
+-- | Encode an ASCII char.
+-- Precondition: Input must be an ASCII character. This is not checked.
+ascii :: Char -> Builder 1
+ascii c = word8 (fromIntegral @Int @Word8 (ord c))
+
+-- | Encode an UTF8 char. This only uses as much space as is required.
+char :: Char -> Builder 4
+char c
+  | codepoint < 0x80 = construct $ \arr off -> do
+      writeByteArray arr off (unsafeWordToWord8 codepoint)
+      pure (off + 1)
+  | codepoint < 0x800 = construct $ \arr off -> do
+      writeByteArray arr off       (unsafeWordToWord8 (byteTwoOne codepoint))
+      writeByteArray arr (off + 1) (unsafeWordToWord8 (byteTwoTwo codepoint))
+      return (off + 2)
+  | codepoint >= 0xD800 && codepoint < 0xE000 = construct $ \arr off -> do
+      -- Codepoint U+FFFD
+      writeByteArray arr off       (0xEF :: Word8)
+      writeByteArray arr (off + 1) (0xBF :: Word8)
+      writeByteArray arr (off + 2) (0xBD :: Word8)
+      return (off + 3)
+  | codepoint < 0x10000 = construct $ \arr off -> do
+      writeByteArray arr off       (unsafeWordToWord8 (byteThreeOne codepoint))
+      writeByteArray arr (off + 1) (unsafeWordToWord8 (byteThreeTwo codepoint))
+      writeByteArray arr (off + 2) (unsafeWordToWord8 (byteThreeThree codepoint))
+      return (off + 3)
+  | otherwise = construct $ \arr off -> do
+      writeByteArray arr off       (unsafeWordToWord8 (byteFourOne codepoint))
+      writeByteArray arr (off + 1) (unsafeWordToWord8 (byteFourTwo codepoint))
+      writeByteArray arr (off + 2) (unsafeWordToWord8 (byteFourThree codepoint))
+      writeByteArray arr (off + 3) (unsafeWordToWord8 (byteFourFour codepoint))
+      return (off + 4)
+
+  where
+    codepoint :: Word
+    codepoint = fromIntegral (ord c)
+
+    unsafeWordToWord8 :: Word -> Word8
+    unsafeWordToWord8 (W# w) = W8# w
+
+    -- precondition: codepoint is less than 0x800
+    byteTwoOne :: Word -> Word
+    byteTwoOne w = unsafeShiftR w 6 .|. 0b11000000
+
+    byteTwoTwo :: Word -> Word
+    byteTwoTwo w = (w .&. 0b00111111) .|. 0b10000000
+
+    -- precondition: codepoint is less than 0x1000
+    byteThreeOne :: Word -> Word
+    byteThreeOne w = unsafeShiftR w 12 .|. 0b11100000
+
+    byteThreeTwo :: Word -> Word
+    byteThreeTwo w = (0b00111111 .&. unsafeShiftR w 6) .|. 0b10000000
+
+    byteThreeThree :: Word -> Word
+    byteThreeThree w = (w .&. 0b00111111) .|. 0b10000000
+
+    -- precondition: codepoint is less than 0x110000
+    byteFourOne :: Word -> Word
+    byteFourOne w = unsafeShiftR w 18 .|. 0b11110000
+
+    byteFourTwo :: Word -> Word
+    byteFourTwo w = (0b00111111 .&. unsafeShiftR w 12) .|. 0b10000000
+
+    byteFourThree :: Word -> Word
+    byteFourThree w = (0b00111111 .&. unsafeShiftR w 6) .|. 0b10000000
+
+    byteFourFour :: Word -> Word
+    byteFourFour w = (0b00111111 .&. w) .|. 0b10000000
+
 -- | Requires exactly 8 bytes. Dump the octets of a 64-bit
 -- word in a big-endian fashion.
 word64BE :: Word64 -> Builder 8

From 2eb37dfb7ca72b08fb3da15d4bdb7b77b428c18f Mon Sep 17 00:00:00 2001
From: Alice McKean <mckean.kylej@gmail.com>
Date: Sun, 1 Sep 2019 22:56:06 -0700
Subject: [PATCH 2/4] add monoidal unit for Bounded Builders

---
 src/Data/ByteArray/Builder/Small/Bounded.hs | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/Data/ByteArray/Builder/Small/Bounded.hs b/src/Data/ByteArray/Builder/Small/Bounded.hs
index 429e0c5..892b36d 100644
--- a/src/Data/ByteArray/Builder/Small/Bounded.hs
+++ b/src/Data/ByteArray/Builder/Small/Bounded.hs
@@ -22,6 +22,7 @@ module Data.ByteArray.Builder.Small.Bounded
   , pasteGrowST
   , pasteIO
     -- * Combine
+  , empty
   , append
     -- * Bounds Manipulation
   , (<=)
@@ -136,6 +137,10 @@ construct f = Builder
 
 infixr 9 `append`
 
+-- | The monoidal unit of `append`
+empty :: Builder 0
+empty = Builder $ \_ off0 s0 -> (# s0, off0 #)
+
 -- | Concatenate two builders.
 append :: Builder n -> Builder m -> Builder (n + m)
 append (Builder f) (Builder g) =

From 2d1ea682617ad952f184c1f12baea4db564e1702 Mon Sep 17 00:00:00 2001
From: Andrew Martin <andrew.thaddeus@gmail.com>
Date: Tue, 3 Sep 2019 14:43:58 -0400
Subject: [PATCH 3/4] Remove the word "Small" from module names. Move unsafe
 functions into modules suffixed with "Unsafe". Write a better IsString
 instance. Depend on natural-arithmetic.

---
 bench/Main.hs                                 |   2 +-
 cabal.project                                 |   5 +
 common/HexWord64.hs                           |   2 +-
 small-bytearray-builder.cabal                 |   9 +-
 .../{Builder/Small.hs => Builder.hs}          |  36 ++---
 .../ByteArray/Builder/{Small => }/Bounded.hs  | 149 ++++++------------
 src/Data/ByteArray/Builder/Bounded/Unsafe.hs  |  59 +++++++
 src/Data/ByteArray/Builder/Unsafe.hs          |  94 +++++++++++
 test/Main.hs                                  |  28 ++--
 9 files changed, 247 insertions(+), 137 deletions(-)
 create mode 100644 cabal.project
 rename src/Data/ByteArray/{Builder/Small.hs => Builder.hs} (93%)
 rename src/Data/ByteArray/Builder/{Small => }/Bounded.hs (82%)
 create mode 100644 src/Data/ByteArray/Builder/Bounded/Unsafe.hs
 create mode 100644 src/Data/ByteArray/Builder/Unsafe.hs

diff --git a/bench/Main.hs b/bench/Main.hs
index a77a65e..28ef227 100644
--- a/bench/Main.hs
+++ b/bench/Main.hs
@@ -2,7 +2,7 @@ import Gauge (bgroup,bench,whnf)
 import Gauge.Main (defaultMain)
 import Data.Word (Word64)
 import Data.Primitive (ByteArray)
-import qualified Data.ByteArray.Builder.Small.Unsafe as U
+import qualified Data.ByteArray.Builder.Bounded as U
 
 import qualified HexWord64
 
diff --git a/cabal.project b/cabal.project
new file mode 100644
index 0000000..079db62
--- /dev/null
+++ b/cabal.project
@@ -0,0 +1,5 @@
+packages: .
+source-repository-package
+  type: git
+  location: https://github.com/andrewthad/natural-arithmetic
+  tag: 68868c96b58ddaf71bb865b247d2c14c3668f4c2
diff --git a/common/HexWord64.hs b/common/HexWord64.hs
index 68ea963..65c5dfa 100644
--- a/common/HexWord64.hs
+++ b/common/HexWord64.hs
@@ -17,7 +17,7 @@ module HexWord64
 
 import GHC.ST (ST(ST))
 import Data.Bits
-import Data.ByteArray.Builder.Small.Unsafe (Builder,construct)
+import Data.ByteArray.Builder.Bounded.Unsafe (Builder,construct)
 import Data.Primitive
 import Data.Word
 import GHC.Exts
diff --git a/small-bytearray-builder.cabal b/small-bytearray-builder.cabal
index f5e511f..9d613bd 100644
--- a/small-bytearray-builder.cabal
+++ b/small-bytearray-builder.cabal
@@ -36,8 +36,10 @@ flag checked
 
 library
   exposed-modules:
-    Data.ByteArray.Builder.Small
-    Data.ByteArray.Builder.Small.Bounded
+    Data.ByteArray.Builder
+    Data.ByteArray.Builder.Unsafe
+    Data.ByteArray.Builder.Bounded
+    Data.ByteArray.Builder.Bounded.Unsafe
   build-depends:
     , base >=4.12.0.0 && <5
     , byteslice >=0.1 && <0.2
@@ -46,6 +48,7 @@ library
     , vector >=0.12.0.3 && <0.13
     , bytestring >=0.10.8.2 && <0.11
     , text-short >=0.1.3 && <0.2
+    , natural-arithmetic >=0.1 && <0.2
   if flag(checked)
     build-depends: primitive-checked >= 0.7 && <0.8
   else
@@ -59,6 +62,7 @@ test-suite test
   type: exitcode-stdio-1.0
   hs-source-dirs: test, common
   main-is: Main.hs
+  ghc-options: -O2 -Wall
   other-modules:
     HexWord64
   build-depends:
@@ -70,6 +74,7 @@ test-suite test
     , tasty-quickcheck >=0.10.1 && <0.11
     , tasty-hunit >=0.10.0.2 && <0.11
     , tasty >=1.2.3 && <1.3
+    , text >=1.2 && <1.3
     , primitive
     , vector
 
diff --git a/src/Data/ByteArray/Builder/Small.hs b/src/Data/ByteArray/Builder.hs
similarity index 93%
rename from src/Data/ByteArray/Builder/Small.hs
rename to src/Data/ByteArray/Builder.hs
index a9cfabe..ce49b26 100644
--- a/src/Data/ByteArray/Builder/Small.hs
+++ b/src/Data/ByteArray/Builder.hs
@@ -6,7 +6,7 @@
 {-# language ScopedTypeVariables #-}
 {-# language UnboxedTuples #-}
 
-module Data.ByteArray.Builder.Small
+module Data.ByteArray.Builder
   ( -- * Bounded Primitives
     Builder(..)
   , construct
@@ -24,6 +24,8 @@ module Data.ByteArray.Builder.Small
   , bytearray
   , shortTextUtf8
   , shortTextJsonString
+  , cstring
+  , stringUtf8
     -- * Encode Integral Types
     -- ** Human-Readable
   , word64Dec
@@ -60,32 +62,15 @@ import Data.Primitive.ByteArray.Offset (MutableByteArrayOffset(..))
 import Data.ByteString.Short.Internal (ShortByteString(SBS))
 import Data.Text.Short (ShortText)
 import Data.Char (ord)
+import Data.ByteArray.Builder.Unsafe (Builder(Builder))
+import Data.ByteArray.Builder.Unsafe (stringUtf8,cstring)
 
 import qualified GHC.Exts as Exts
 import qualified Data.Text.Short as TS
 import qualified Data.Primitive as PM
 import qualified Data.Vector as V
-import qualified Data.ByteArray.Builder.Small.Bounded as Bounded
-
--- | An unmaterialized sequence of bytes that may be pasted
--- into a mutable byte array.
-newtype Builder = Builder
-  -- This functions takes an offset and a number of remaining bytes
-  -- and returns the new offset.
-  (forall s. MutableByteArray# s -> Int# -> Int# -> State# s -> (# State# s, Int# #))
-
-instance Semigroup Builder where
-  {-# inline (<>) #-}
-  Builder f <> Builder g = Builder $ \arr off0 len0 s0 -> case f arr off0 len0 s0 of
-    (# s1, r #) -> case r /=# (-1#) of
-      1# -> g arr r (len0 +# (off0 -# r)) s1
-      _ -> (# s1, (-1#) #)
-
-instance Monoid Builder where
-  mempty = Builder $ \_ off0 _ s0 -> (# s0, off0 #)
-
-instance IsString Builder where
-  fromString = shortTextUtf8 . TS.fromString
+import qualified Data.ByteArray.Builder.Bounded as Bounded
+import qualified Data.ByteArray.Builder.Bounded.Unsafe as UnsafeBounded
 
 -- | Run a builder. An accurate size hint is important for good performance.
 -- The size hint should be slightly larger than the actual size.
@@ -101,7 +86,7 @@ run hint b = runByteArrayST $ do
           Just len -> do
             shrinkMutableByteArray arr len
             unsafeFreezeByteArray arr
-  go hint
+  go (max hint 1)
 
 -- | Variant of 'pasteArrayST' that runs in 'IO'.
 pasteArrayIO ::
@@ -186,9 +171,10 @@ construct f = Builder
         Nothing -> (# s1, (-1#) #)
         Just (I# n) -> (# s1, n #)
 
+-- | Convert a bounded builder to an unbounded one.
 fromBounded :: forall n. KnownNat n => Bounded.Builder n -> Builder
 {-# inline fromBounded #-}
-fromBounded (Bounded.Builder f) = Builder $ \arr off len s0 ->
+fromBounded (UnsafeBounded.Builder f) = Builder $ \arr off len s0 ->
   case fromIntegral (natVal' (proxy# :: Proxy# n)) of
     I# req -> case len >=# req of
       1# -> f arr off s0
@@ -226,7 +212,7 @@ slicedUtf8TextJson !src# !soff0# !slen0# = construct $ \(MutableBytes dst doff0
                     then PM.writeByteArray dst doff (c2w c) *> go (soff + 1) (slen - 1) (doff + 1)
                     else do
                       write2 dst doff '\\' 'u'
-                      doff' <- Bounded.pasteST
+                      doff' <- UnsafeBounded.pasteST
                         (Bounded.word16PaddedUpperHex (fromIntegral (c2w c)))
                         dst (doff + 2)
                       go (soff + 1) (slen - 1) doff'
diff --git a/src/Data/ByteArray/Builder/Small/Bounded.hs b/src/Data/ByteArray/Builder/Bounded.hs
similarity index 82%
rename from src/Data/ByteArray/Builder/Small/Bounded.hs
rename to src/Data/ByteArray/Builder/Bounded.hs
index 892b36d..5db8a12 100644
--- a/src/Data/ByteArray/Builder/Small/Bounded.hs
+++ b/src/Data/ByteArray/Builder/Bounded.hs
@@ -1,4 +1,3 @@
-{-# language GADTs #-}
 {-# language KindSignatures #-}
 {-# language ScopedTypeVariables #-}
 {-# language BangPatterns #-}
@@ -12,23 +11,18 @@
 {-# language TypeApplications #-}
 
 -- | The functions in this module are explict in the amount of bytes they require.
-module Data.ByteArray.Builder.Small.Bounded
+module Data.ByteArray.Builder.Bounded
   ( -- * Builder
-    Builder(..)
-  , construct
+    Builder
     -- * Execute
   , run
-  , pasteST
   , pasteGrowST
-  , pasteIO
     -- * Combine
   , empty
   , append
     -- * Bounds Manipulation
-  , (<=)
-  , lessThanEqual
-  , isLessThanEqual
-  , raise
+  , weaken
+  , substitute
     -- * Encode Integral Types
     -- ** Human-Readable
   , word64Dec
@@ -56,46 +50,33 @@ import Data.Bits
 import Data.Char (ord)
 import Data.Primitive
 import GHC.Exts
-import GHC.ST
-import GHC.Word
-import GHC.Int
-import Data.Kind
-import GHC.TypeLits (KnownNat,Nat,type (+),natVal')
-import qualified GHC.TypeLits as GHC
+import GHC.ST (ST(ST))
+import GHC.Word (Word8(W8#),Word16(W16#),Word32(W32#),Word64(W64#))
+import GHC.Int (Int64(I64#))
+import GHC.TypeLits (KnownNat,type (+),natVal')
 import Data.Primitive.ByteArray.Offset (MutableByteArrayOffset(..))
-import qualified Control.Category as Cat
+import Arithmetic.Types (type (<=), type (:=:))
+import Data.ByteArray.Builder.Bounded.Unsafe (Builder(..))
 
+import qualified Data.ByteArray.Builder.Bounded.Unsafe as Unsafe
 import qualified Data.Primitive as PM
 
--- | A builder parameterized by the maximum number of bytes it uses
--- when executed.
-newtype Builder :: Nat -> Type where
-   Builder ::
-        (forall s. MutableByteArray# s -> Int# -> State# s -> (# State# s, Int# #))
-     -> Builder n
-
+-- Used internally.
 knownNat :: KnownNat n => Proxy# n -> Int
+{-# inline knownNat #-}
 knownNat p = fromIntegral (natVal' p)
 
--- | Execute the builder. This function is safe.
+-- | Execute the bounded builder.
 run :: forall n. KnownNat n
   => Builder n -- ^ Builder
   -> ByteArray
 {-# inline run #-}
 run b = runST $ do
   arr <- newByteArray (knownNat (proxy# :: Proxy# n))
-  len <- pasteST b arr 0
+  len <- Unsafe.pasteST b arr 0
   shrinkMutableByteArray arr len
   unsafeFreezeByteArray arr
 
--- | This function does not enforce the known upper bound on the
--- size. It is up to the user to do this.
-pasteST :: Builder n -> MutableByteArray s -> Int -> ST s Int
-{-# inline pasteST #-}
-pasteST (Builder f) (MutableByteArray arr) (I# off) =
-  ST $ \s0 -> case f arr off s0 of
-    (# s1, r #) -> (# s1, (I# r) #)
-
 -- | Paste the builder into the byte array starting at offset zero.
 -- This reallocates the byte array if it cannot accomodate the builder,
 -- growing it by the minimum amount necessary.
@@ -112,67 +93,40 @@ pasteGrowST b !(MutableByteArrayOffset{array=arr0,offset=off0}) = do
   let sz1 = off0 + req
   if sz1 <= sz0
     then do
-      off1 <- pasteST b arr0 off0
+      off1 <- Unsafe.pasteST b arr0 off0
       pure (MutableByteArrayOffset arr0 off1)
     else do
       arr1 <- PM.resizeMutableByteArray arr0 sz1
-      off1 <- pasteST b arr1 off0
+      off1 <- Unsafe.pasteST b arr1 off0
       pure (MutableByteArrayOffset arr1 off1)
 
--- | This function does not enforce the known upper bound on the
--- size. It is up to the user to do this.
-pasteIO :: Builder n -> MutableByteArray RealWorld -> Int -> IO Int
-{-# inline pasteIO #-}
-pasteIO b m off = stToIO (pasteST b m off)
-
--- | Constructor for 'Builder' that works on a function with lifted
--- arguments instead of unlifted ones. This is just as unsafe as the
--- actual constructor.
-construct :: (forall s. MutableByteArray s -> Int -> ST s Int) -> Builder n
-{-# inline construct #-}
-construct f = Builder
-  $ \arr off s0 ->
-    case unST (f (MutableByteArray arr) (I# off)) s0 of
-      (# s1, (I# n) #) -> (# s1, n #)
-
-infixr 9 `append`
-
 -- | The monoidal unit of `append`
 empty :: Builder 0
 empty = Builder $ \_ off0 s0 -> (# s0, off0 #)
 
+infixr 9 `append`
+
 -- | Concatenate two builders.
-append :: Builder n -> Builder m -> Builder (n + m)
+append :: Builder m -> Builder n -> Builder (m + n)
 append (Builder f) (Builder g) =
   Builder $ \arr off0 s0 -> case f arr off0 s0 of
     (# s1, r #) -> g arr r s1
 
--- | A proof that n is less than or equal to m
-newtype (n :: Nat) <= (m :: Nat) = LessThanEqual Int -- m - n
+-- | Weaken the bound on the maximum number of bytes required. For example,
+-- to use two builders with unequal bounds in a disjunctive setting:
+--
+-- > import qualified Arithmetic.Lte as Lte
+-- >
+-- > buildNumber :: Either Double Word64 -> Builder 32
+-- > buildNumber = \case
+-- >   Left d  -> doubleDec d
+-- >   Right w -> weaken (Lte.constant @19 @32) (word64Dec w)
+weaken :: forall m n. (m <= n) -> Builder m -> Builder n
+weaken !_ (Builder f) = Builder f
 
-instance Cat.Category (<=) where
-  id = LessThanEqual 0
-  -- b <= c (c - b) -> a <= b (b - a) -> a <= c (c - a)
-  LessThanEqual cb . LessThanEqual ba = LessThanEqual (cb + ba)
-
--- | Dynamically check than 'n' is less than or equal to 'm'
-isLessThanEqual :: (KnownNat n, KnownNat m) => Proxy# n -> Proxy# m -> Maybe (n <= m)
-isLessThanEqual n m = if 0 <= diff then Just (LessThanEqual diff) else Nothing
-  where diff = knownNat m - knownNat n
-
--- | Statically check than 'n' is less than or equal to 'm'. 'n' and 'm' must be known at compile time.
-lessThanEqual :: forall n m. (KnownNat n, KnownNat m, n GHC.<= m) => n <= m
-lessThanEqual = LessThanEqual (knownNat (proxy# :: Proxy# m) - knownNat (proxy# :: Proxy# n))
-
--- | Weaken the bound on the maximum number of bytes required.
--- >>> :{
--- buildNumber :: Either Double Word64 -> Builder 32
--- buildNumber = \case
---   Left d  -> doubleDec d
---   Right w -> raise lessThanEqual (word64Dec w)
--- :}
-raise :: forall m n. n <= m -> Builder n -> Builder m
-raise !_ (Builder f) = Builder f
+-- | Replace the upper bound on size with an equal number.
+substitute :: forall m n. (m :=: n) -> Builder m -> Builder n
+substitute !_ (Builder f) = Builder f
 
 -- | Encode a double-floating-point number, using decimal notation or
 -- scientific notation depending on the magnitude. This has undefined
@@ -207,7 +161,7 @@ int64Dec (I64# w) = int64Dec# w
 -- the word. This is only used internally.
 wordCommonDec# :: Word# -> Builder n
 {-# noinline wordCommonDec# #-}
-wordCommonDec# w# = construct $ \arr off0 -> if w /= 0
+wordCommonDec# w# = Unsafe.construct $ \arr off0 -> if w /= 0
   then internalWordLoop arr off0 (W# w#)
   else do
     writeByteArray arr off0 (c2w '0')
@@ -230,7 +184,7 @@ internalWordLoop arr off0 x0 = go off0 x0 where
 -- | Requires up to 19 bytes.
 int64Dec# :: Int# -> Builder 20
 {-# noinline int64Dec# #-}
-int64Dec# w# = construct $ \arr off0 -> case compare w 0 of
+int64Dec# w# = Unsafe.construct $ \arr off0 -> case compare w 0 of
   GT -> internalWordLoop arr off0 (fromIntegral w)
   EQ -> do
     writeByteArray arr off0 (c2w '0')
@@ -286,7 +240,7 @@ word8PaddedUpperHex (W8# w) = word8PaddedUpperHex# w
 -- might not be. Benchmark this.
 word64PaddedUpperHex# :: Word# -> Builder 16
 {-# noinline word64PaddedUpperHex# #-}
-word64PaddedUpperHex# w# = construct $ \arr off -> do
+word64PaddedUpperHex# w# = Unsafe.construct $ \arr off -> do
   writeByteArray arr off (toHexUpper (unsafeShiftR w 60))
   writeByteArray arr (off + 1) (toHexUpper (unsafeShiftR w 56))
   writeByteArray arr (off + 2) (toHexUpper (unsafeShiftR w 52))
@@ -309,7 +263,7 @@ word64PaddedUpperHex# w# = construct $ \arr off -> do
 
 word32PaddedUpperHex# :: Word# -> Builder 8
 {-# noinline word32PaddedUpperHex# #-}
-word32PaddedUpperHex# w# = construct $ \arr off -> do
+word32PaddedUpperHex# w# = Unsafe.construct $ \arr off -> do
   writeByteArray arr off (toHexUpper (unsafeShiftR w 28))
   writeByteArray arr (off + 1) (toHexUpper (unsafeShiftR w 24))
   writeByteArray arr (off + 2) (toHexUpper (unsafeShiftR w 20))
@@ -326,7 +280,7 @@ word32PaddedUpperHex# w# = construct $ \arr off -> do
 -- GHC make the decision. Open an issue on github if this is
 -- a problem.
 word16PaddedUpperHex# :: Word# -> Builder 4
-word16PaddedUpperHex# w# = construct $ \arr off -> do
+word16PaddedUpperHex# w# = Unsafe.construct $ \arr off -> do
   writeByteArray arr off (toHexUpper (unsafeShiftR w 12))
   writeByteArray arr (off + 1) (toHexUpper (unsafeShiftR w 8))
   writeByteArray arr (off + 2) (toHexUpper (unsafeShiftR w 4))
@@ -338,7 +292,7 @@ word16PaddedUpperHex# w# = construct $ \arr off -> do
 -- Definitely want this to inline. It's maybe a dozen instructions total.
 word8PaddedUpperHex# :: Word# -> Builder 2
 {-# inline word8PaddedUpperHex #-}
-word8PaddedUpperHex# w# = construct $ \arr off -> do
+word8PaddedUpperHex# w# = Unsafe.construct $ \arr off -> do
   writeByteArray arr off (toHexUpper (unsafeShiftR w 4))
   writeByteArray arr (off + 1) (toHexUpper (unsafeShiftR w 0))
   pure (off + 2)
@@ -350,28 +304,28 @@ word8PaddedUpperHex# w# = construct $ \arr off -> do
 ascii :: Char -> Builder 1
 ascii c = word8 (fromIntegral @Int @Word8 (ord c))
 
--- | Encode an UTF8 char. This only uses as much space as is required.
+-- | Encode a character as UTF-8. This only uses as much space as is required.
 char :: Char -> Builder 4
 char c
-  | codepoint < 0x80 = construct $ \arr off -> do
+  | codepoint < 0x80 = Unsafe.construct $ \arr off -> do
       writeByteArray arr off (unsafeWordToWord8 codepoint)
       pure (off + 1)
-  | codepoint < 0x800 = construct $ \arr off -> do
+  | codepoint < 0x800 = Unsafe.construct $ \arr off -> do
       writeByteArray arr off       (unsafeWordToWord8 (byteTwoOne codepoint))
       writeByteArray arr (off + 1) (unsafeWordToWord8 (byteTwoTwo codepoint))
       return (off + 2)
-  | codepoint >= 0xD800 && codepoint < 0xE000 = construct $ \arr off -> do
+  | codepoint >= 0xD800 && codepoint < 0xE000 = Unsafe.construct $ \arr off -> do
       -- Codepoint U+FFFD
       writeByteArray arr off       (0xEF :: Word8)
       writeByteArray arr (off + 1) (0xBF :: Word8)
       writeByteArray arr (off + 2) (0xBD :: Word8)
       return (off + 3)
-  | codepoint < 0x10000 = construct $ \arr off -> do
+  | codepoint < 0x10000 = Unsafe.construct $ \arr off -> do
       writeByteArray arr off       (unsafeWordToWord8 (byteThreeOne codepoint))
       writeByteArray arr (off + 1) (unsafeWordToWord8 (byteThreeTwo codepoint))
       writeByteArray arr (off + 2) (unsafeWordToWord8 (byteThreeThree codepoint))
       return (off + 3)
-  | otherwise = construct $ \arr off -> do
+  | otherwise = Unsafe.construct $ \arr off -> do
       writeByteArray arr off       (unsafeWordToWord8 (byteFourOne codepoint))
       writeByteArray arr (off + 1) (unsafeWordToWord8 (byteFourTwo codepoint))
       writeByteArray arr (off + 2) (unsafeWordToWord8 (byteFourThree codepoint))
@@ -418,7 +372,7 @@ char c
 -- | Requires exactly 8 bytes. Dump the octets of a 64-bit
 -- word in a big-endian fashion.
 word64BE :: Word64 -> Builder 8
-word64BE w = construct $ \arr off -> do
+word64BE w = Unsafe.construct $ \arr off -> do
   writeByteArray arr (off    ) (fromIntegral @Word64 @Word8 (unsafeShiftR w 56))
   writeByteArray arr (off + 1) (fromIntegral @Word64 @Word8 (unsafeShiftR w 48))
   writeByteArray arr (off + 2) (fromIntegral @Word64 @Word8 (unsafeShiftR w 40))
@@ -432,7 +386,7 @@ word64BE w = construct $ \arr off -> do
 -- | Requires exactly 4 bytes. Dump the octets of a 32-bit
 -- word in a big-endian fashion.
 word32BE :: Word32 -> Builder 4
-word32BE w = construct $ \arr off -> do
+word32BE w = Unsafe.construct $ \arr off -> do
   writeByteArray arr (off    ) (fromIntegral @Word32 @Word8 (unsafeShiftR w 24))
   writeByteArray arr (off + 1) (fromIntegral @Word32 @Word8 (unsafeShiftR w 16))
   writeByteArray arr (off + 2) (fromIntegral @Word32 @Word8 (unsafeShiftR w 8))
@@ -442,13 +396,13 @@ word32BE w = construct $ \arr off -> do
 -- | Requires exactly 2 bytes. Dump the octets of a 16-bit
 -- word in a big-endian fashion.
 word16BE :: Word16 -> Builder 2
-word16BE w = construct $ \arr off -> do
+word16BE w = Unsafe.construct $ \arr off -> do
   writeByteArray arr (off    ) (fromIntegral @Word16 @Word8 (unsafeShiftR w 8))
   writeByteArray arr (off + 1) (fromIntegral @Word16 @Word8 w)
   pure (off + 2)
 
 word8 :: Word8 -> Builder 1
-word8 w = construct $ \arr off -> do
+word8 w = Unsafe.construct $ \arr off -> do
   writeByteArray arr off w
   pure (off + 1)
 
@@ -469,9 +423,6 @@ reverseBytes arr begin end = go begin end where
 c2w :: Char -> Word8
 c2w = fromIntegral . ord
 
-unST :: ST s a -> State# s -> (# State# s, a #)
-unST (ST f) = f
-
 shrinkMutableByteArray :: MutableByteArray s -> Int -> ST s ()
 shrinkMutableByteArray (MutableByteArray arr) (I# sz) =
   primitive_ (shrinkMutableByteArray# arr sz)
diff --git a/src/Data/ByteArray/Builder/Bounded/Unsafe.hs b/src/Data/ByteArray/Builder/Bounded/Unsafe.hs
new file mode 100644
index 0000000..9bad04f
--- /dev/null
+++ b/src/Data/ByteArray/Builder/Bounded/Unsafe.hs
@@ -0,0 +1,59 @@
+{-# language DataKinds #-}
+{-# language GADTSyntax #-}
+{-# language KindSignatures #-}
+{-# language MagicHash #-}
+{-# language RankNTypes #-}
+{-# language ScopedTypeVariables #-}
+{-# language UnboxedTuples #-}
+
+module Data.ByteArray.Builder.Bounded.Unsafe
+  ( -- * Types
+    Builder(..)
+    -- * Construct
+  , construct
+    -- * Run
+  , pasteST
+  , pasteIO
+  ) where
+
+import GHC.TypeLits (Nat)
+import Data.Kind (Type)
+import GHC.IO (stToIO)
+import GHC.ST (ST(ST))
+import GHC.Exts (Int(I#),RealWorld,Int#,State#,MutableByteArray#)
+import Data.Primitive (MutableByteArray(..))
+
+-- | A builder parameterized by the maximum number of bytes it uses
+-- when executed.
+newtype Builder :: Nat -> Type where
+   Builder ::
+        (forall s. MutableByteArray# s -> Int# -> State# s -> (# State# s, Int# #))
+     -> Builder n
+
+-- | Constructor for 'Builder' that works on a function with lifted
+-- arguments instead of unlifted ones. This is just as unsafe as the
+-- actual constructor.
+construct :: (forall s. MutableByteArray s -> Int -> ST s Int) -> Builder n
+{-# inline construct #-}
+construct f = Builder
+  $ \arr off s0 ->
+    case unST (f (MutableByteArray arr) (I# off)) s0 of
+      (# s1, (I# n) #) -> (# s1, n #)
+
+-- | This function does not enforce the known upper bound on the
+-- size. It is up to the user to do this.
+pasteST :: Builder n -> MutableByteArray s -> Int -> ST s Int
+{-# inline pasteST #-}
+pasteST (Builder f) (MutableByteArray arr) (I# off) =
+  ST $ \s0 -> case f arr off s0 of
+    (# s1, r #) -> (# s1, (I# r) #)
+
+-- | This function does not enforce the known upper bound on the
+-- size. It is up to the user to do this.
+pasteIO :: Builder n -> MutableByteArray RealWorld -> Int -> IO Int
+{-# inline pasteIO #-}
+pasteIO b m off = stToIO (pasteST b m off)
+
+unST :: ST s a -> State# s -> (# State# s, a #)
+unST (ST f) = f
+
diff --git a/src/Data/ByteArray/Builder/Unsafe.hs b/src/Data/ByteArray/Builder/Unsafe.hs
new file mode 100644
index 0000000..18c2eba
--- /dev/null
+++ b/src/Data/ByteArray/Builder/Unsafe.hs
@@ -0,0 +1,94 @@
+{-# language BangPatterns #-}
+{-# language DuplicateRecordFields #-}
+{-# language LambdaCase #-}
+{-# language MagicHash #-}
+{-# language RankNTypes #-}
+{-# language ScopedTypeVariables #-}
+{-# language UnboxedTuples #-}
+
+module Data.ByteArray.Builder.Unsafe
+  ( -- * Types
+    Builder(..)
+    -- * Safe Functions
+    -- | These functions are actually completely safe, but they are defined
+    -- here because they are used by typeclass instances. Import them from
+    -- @Data.ByteArray.Builder@ instead.
+  , stringUtf8
+  , cstring
+  ) where
+
+import Data.Primitive (MutableByteArray(MutableByteArray))
+import Foreign.C.String (CString)
+import GHC.Exts ((-#),(+#),(/=#),(>#))
+import GHC.Exts (Addr#,Int(I#),Ptr(Ptr))
+import GHC.Exts (IsString,Int#,State#,MutableByteArray#)
+import GHC.ST (ST(ST))
+import GHC.Base (unpackCString#,unpackCStringUtf8#)
+
+import qualified GHC.Exts as Exts
+import qualified Data.ByteArray.Builder.Bounded as Bounded
+import qualified Data.ByteArray.Builder.Bounded.Unsafe as UnsafeBounded
+
+-- | An unmaterialized sequence of bytes that may be pasted
+-- into a mutable byte array.
+newtype Builder = Builder
+  -- This functions takes an offset and a number of remaining bytes
+  -- and returns the new offset.
+  (forall s. MutableByteArray# s -> Int# -> Int# -> State# s -> (# State# s, Int# #))
+
+instance IsString Builder where
+  {-# inline fromString #-}
+  fromString = stringUtf8
+
+instance Semigroup Builder where
+  {-# inline (<>) #-}
+  Builder f <> Builder g = Builder $ \arr off0 len0 s0 -> case f arr off0 len0 s0 of
+    (# s1, r #) -> case r /=# (-1#) of
+      1# -> g arr r (len0 +# (off0 -# r)) s1
+      _ -> (# s1, (-1#) #)
+
+instance Monoid Builder where
+  {-# inline mempty #-}
+  mempty = Builder $ \_ off0 _ s0 -> (# s0, off0 #)
+
+-- | Create a builder from a cons-list of 'Char'. These
+-- are be UTF-8 encoded.
+stringUtf8 :: String -> Builder
+{-# inline stringUtf8 #-}
+stringUtf8 cs = Builder (\arr off0 len0 s0 -> goString cs arr off0 len0 s0)
+
+-- | Create a builder from a @NUL@-terminated 'CString'. This ignores any
+-- textual encoding, copying bytes until @NUL@ is reached.
+cstring :: CString -> Builder
+{-# inline cstring #-}
+cstring (Ptr cs) = Builder (\arr off0 len0 s0 -> goCString cs arr off0 len0 s0)
+
+goString :: String -> MutableByteArray# s -> Int# -> Int# -> State# s -> (# State# s, Int# #)
+{-# noinline goString #-}
+goString [] _ off0 _ s0 = (# s0, off0 #)
+goString (c : cs) buf off0 len0 s0 = case len0 ># 3# of
+  1# -> case unST (UnsafeBounded.pasteST (Bounded.char c) (MutableByteArray buf) (I# off0)) s0 of
+    (# s1, I# off1 #) -> goString cs buf off1 (len0 -# (off1 -# off0)) s1
+  _ -> (# s0, (-1#) #)
+
+-- We have to have a rule for both unpackCString# and unpackCStringUtf8#
+-- since GHC uses a different function based on whether or not non-ASCII
+-- codepoints are used in the string.
+{-# RULES
+"Builder stringUtf8/cstring" forall s a b c d.
+  goString (unpackCString# s) a b c d = goCString s a b c d
+"Builder stringUtf8/cstring-utf8" forall s a b c d.
+  goString (unpackCStringUtf8# s) a b c d = goCString s a b c d
+#-}
+
+goCString :: Addr# -> MutableByteArray# s -> Int# -> Int# -> State# s -> (# State# s, Int# #)
+goCString addr buf off0 len0 s0 = case Exts.indexWord8OffAddr# addr 0# of
+  0## -> (# s0, off0 #)
+  w -> case len0 of
+    0# -> (# s0, (-1#) #)
+    _ -> case Exts.writeWord8Array# buf off0 w s0 of
+      s1 -> goCString (Exts.plusAddr# addr 1# ) buf (off0 +# 1# ) (len0 -# 1# ) s1
+
+unST :: ST s a -> State# s -> (# State# s, a #)
+unST (ST f) = f
+
diff --git a/test/Main.hs b/test/Main.hs
index 649711e..37b2cb0 100644
--- a/test/Main.hs
+++ b/test/Main.hs
@@ -5,7 +5,7 @@
 
 import Control.Monad.ST (runST)
 import Data.Bytes.Types (MutableBytes(..))
-import Data.ByteArray.Builder.Small
+import Data.ByteArray.Builder
 import Data.Word
 import Data.Char (ord)
 import Data.Primitive (ByteArray)
@@ -14,15 +14,19 @@ import Test.Tasty (defaultMain,testGroup,TestTree)
 import Test.QuickCheck ((===))
 import Text.Printf (printf)
 import Test.Tasty.HUnit ((@=?))
+
+import qualified Data.ByteString as ByteString
 import qualified Data.ByteString.Builder as BB
-import qualified Data.Primitive as PM
-import qualified Data.List as L
-import qualified Data.Vector as V
-import qualified Test.Tasty.QuickCheck as TQC
-import qualified Test.QuickCheck as QC
-import qualified GHC.Exts as Exts
 import qualified Data.ByteString.Lazy.Char8 as LB
+import qualified Data.List as L
+import qualified Data.Primitive as PM
+import qualified Data.Text as T
+import qualified Data.Text.Encoding as TE
+import qualified Data.Vector as V
+import qualified GHC.Exts as Exts
+import qualified Test.QuickCheck as QC
 import qualified Test.Tasty.HUnit as THU
+import qualified Test.Tasty.QuickCheck as TQC
 
 import qualified HexWord64
 
@@ -54,6 +58,9 @@ tests = testGroup "Tests"
         (runArray word64Dec (V.fromList xs))
         ===
         pack (foldMap show xs)
+    , THU.testCase "stringUtf8" $
+        packUtf8 "¿Cómo estás? I am doing well." @=?
+          run 1 (stringUtf8 "¿Cómo estás? I am doing well.")
     , THU.testCase "doubleDec-A" $
         pack (show (2 :: Int)) @=? run 1 (doubleDec 2.0)
     , THU.testCase "doubleDec-B" $
@@ -88,8 +95,8 @@ tests = testGroup "Tests"
   , testGroup "alternate"
     [ TQC.testProperty "HexWord64" $ \x y ->
         run 1
-          (  fromUnsafe (HexWord64.word64PaddedUpperHex x)
-          <> fromUnsafe (HexWord64.word64PaddedUpperHex y)
+          (  fromBounded (HexWord64.word64PaddedUpperHex x)
+          <> fromBounded (HexWord64.word64PaddedUpperHex y)
           )
         ===
         pack (showWord64PaddedUpperHex x <> showWord64PaddedUpperHex y)
@@ -99,6 +106,9 @@ tests = testGroup "Tests"
 pack :: String -> ByteArray
 pack = Exts.fromList . map (fromIntegral @Int @Word8 . ord)
 
+packUtf8 :: String -> ByteArray
+packUtf8 = Exts.fromList . ByteString.unpack . TE.encodeUtf8 . T.pack
+
 -- This is used to test pasteArrayST
 runArray ::
      (a -> Builder) -- ^ Builder

From a5bbf88e71c8506c64a7d135e1b8bd3c7152e9ed Mon Sep 17 00:00:00 2001
From: Andrew Martin <andrew.thaddeus@gmail.com>
Date: Tue, 3 Sep 2019 14:58:03 -0400
Subject: [PATCH 4/4] Avoid KnownNat constraints in favor of using
 Arithmetic.Types.Nat

---
 bench/Main.hs                         | 10 ++--
 small-bytearray-builder.cabal         | 14 +++---
 src/Data/ByteArray/Builder.hs         | 72 +++++++++++++++------------
 src/Data/ByteArray/Builder/Bounded.hs | 57 ++++++++++-----------
 test/Main.hs                          |  7 ++-
 5 files changed, 85 insertions(+), 75 deletions(-)

diff --git a/bench/Main.hs b/bench/Main.hs
index 28ef227..65872c4 100644
--- a/bench/Main.hs
+++ b/bench/Main.hs
@@ -1,7 +1,9 @@
+import Data.Primitive (ByteArray)
+import Data.Word (Word64)
 import Gauge (bgroup,bench,whnf)
 import Gauge.Main (defaultMain)
-import Data.Word (Word64)
-import Data.Primitive (ByteArray)
+
+import qualified Arithmetic.Nat as Nat
 import qualified Data.ByteArray.Builder.Bounded as U
 
 import qualified HexWord64
@@ -33,7 +35,7 @@ data Word64s = Word64s
 
 encodeHexWord64s :: Word64s -> ByteArray
 {-# noinline encodeHexWord64s #-}
-encodeHexWord64s (Word64s a b c d e f g h) = U.run $
+encodeHexWord64s (Word64s a b c d e f g h) = U.run Nat.constant $
   U.word64PaddedUpperHex a `U.append`
   U.word64PaddedUpperHex b `U.append`
   U.word64PaddedUpperHex c `U.append`
@@ -45,7 +47,7 @@ encodeHexWord64s (Word64s a b c d e f g h) = U.run $
 
 encodeHexWord64sLoop :: Word64s -> ByteArray
 {-# noinline encodeHexWord64sLoop #-}
-encodeHexWord64sLoop (Word64s a b c d e f g h) = U.run $
+encodeHexWord64sLoop (Word64s a b c d e f g h) = U.run Nat.constant $
   HexWord64.word64PaddedUpperHex a `U.append`
   HexWord64.word64PaddedUpperHex b `U.append`
   HexWord64.word64PaddedUpperHex c `U.append`
diff --git a/small-bytearray-builder.cabal b/small-bytearray-builder.cabal
index 9d613bd..6373cf7 100644
--- a/small-bytearray-builder.cabal
+++ b/small-bytearray-builder.cabal
@@ -66,16 +66,17 @@ test-suite test
   other-modules:
     HexWord64
   build-depends:
+    , QuickCheck >=2.13.1 && <2.14
     , base >=4.12.0.0 && <5
     , byteslice
     , bytestring
-    , small-bytearray-builder
-    , QuickCheck >=2.13.1 && <2.14
-    , tasty-quickcheck >=0.10.1 && <0.11
-    , tasty-hunit >=0.10.0.2 && <0.11
-    , tasty >=1.2.3 && <1.3
-    , text >=1.2 && <1.3
+    , natural-arithmetic
     , primitive
+    , small-bytearray-builder
+    , tasty >=1.2.3 && <1.3
+    , tasty-hunit >=0.10.0.2 && <0.11
+    , tasty-quickcheck >=0.10.1 && <0.11
+    , text >=1.2 && <1.3
     , vector
 
 benchmark bench
@@ -83,6 +84,7 @@ benchmark bench
   build-depends:
     , base
     , gauge >= 0.2.4
+    , natural-arithmetic
     , primitive
     , small-bytearray-builder
   ghc-options: -Wall -O2
diff --git a/src/Data/ByteArray/Builder.hs b/src/Data/ByteArray/Builder.hs
index ce49b26..9302292 100644
--- a/src/Data/ByteArray/Builder.hs
+++ b/src/Data/ByteArray/Builder.hs
@@ -51,20 +51,21 @@ module Data.ByteArray.Builder
 import Control.Monad.Primitive
 import Control.Monad.ST
 import Control.Monad.ST.Run (runByteArrayST)
-import Data.Bytes.Types
-import Data.Primitive
-import Data.Int (Int64)
-import GHC.Exts
-import GHC.ST
-import GHC.Word
-import GHC.TypeLits (KnownNat,natVal')
-import Data.Primitive.ByteArray.Offset (MutableByteArrayOffset(..))
-import Data.ByteString.Short.Internal (ShortByteString(SBS))
-import Data.Text.Short (ShortText)
-import Data.Char (ord)
 import Data.ByteArray.Builder.Unsafe (Builder(Builder))
 import Data.ByteArray.Builder.Unsafe (stringUtf8,cstring)
+import Data.ByteString.Short.Internal (ShortByteString(SBS))
+import Data.Bytes.Types (Bytes(Bytes),MutableBytes(MutableBytes))
+import Data.Char (ord)
+import Data.Int (Int64)
+import Data.Primitive
+import Data.Primitive.ByteArray.Offset (MutableByteArrayOffset(..))
+import Data.Text.Short (ShortText)
+import GHC.Exts
+import GHC.ST (ST(ST))
+import GHC.Word
 
+import qualified Arithmetic.Nat as Nat
+import qualified Arithmetic.Types as Arithmetic
 import qualified GHC.Exts as Exts
 import qualified Data.Text.Short as TS
 import qualified Data.Primitive as PM
@@ -171,14 +172,19 @@ construct f = Builder
         Nothing -> (# s1, (-1#) #)
         Just (I# n) -> (# s1, n #)
 
--- | Convert a bounded builder to an unbounded one.
-fromBounded :: forall n. KnownNat n => Bounded.Builder n -> Builder
+-- | Convert a bounded builder to an unbounded one. If the size
+-- is a constant, use @Arithmetic.Nat.constant@ as the first argument
+-- to let GHC conjure up this value for you.
+fromBounded ::
+     Arithmetic.Nat n
+  -> Bounded.Builder n
+  -> Builder
 {-# inline fromBounded #-}
-fromBounded (UnsafeBounded.Builder f) = Builder $ \arr off len s0 ->
-  case fromIntegral (natVal' (proxy# :: Proxy# n)) of
-    I# req -> case len >=# req of
-      1# -> f arr off s0
-      _ -> (# s0, (-1#) #)
+fromBounded n (UnsafeBounded.Builder f) = Builder $ \arr off len s0 ->
+  let !(I# req) = Nat.demote n in
+  case len >=# req of
+    1# -> f arr off s0
+    _ -> (# s0, (-1#) #)
 
 -- | Create a builder from an unsliced byte sequence.
 bytearray :: ByteArray -> Builder
@@ -251,70 +257,70 @@ shortTextJsonString a =
 -- This encoding never starts with a zero unless the
 -- argument was zero.
 word64Dec :: Word64 -> Builder
-word64Dec w = fromBounded (Bounded.word64Dec w)
+word64Dec w = fromBounded Nat.constant (Bounded.word64Dec w)
 
 -- | Encodes an unsigned 16-bit integer as decimal.
 -- This encoding never starts with a zero unless the
 -- argument was zero.
 word32Dec :: Word32 -> Builder
-word32Dec w = fromBounded (Bounded.word32Dec w)
+word32Dec w = fromBounded Nat.constant (Bounded.word32Dec w)
 
 -- | Encodes an unsigned 16-bit integer as decimal.
 -- This encoding never starts with a zero unless the
 -- argument was zero.
 word16Dec :: Word16 -> Builder
-word16Dec w = fromBounded (Bounded.word16Dec w)
+word16Dec w = fromBounded Nat.constant (Bounded.word16Dec w)
 
 -- | Encode a double-floating-point number, using decimal notation or
 -- scientific notation depending on the magnitude. This has undefined
 -- behavior when representing @+inf@, @-inf@, and @NaN@. It will not
 -- crash, but the generated numbers will be nonsense.
 doubleDec :: Double -> Builder
-doubleDec w = fromBounded (Bounded.doubleDec w)
+doubleDec w = fromBounded Nat.constant (Bounded.doubleDec w)
 
 -- | Encodes a signed 64-bit integer as decimal.
 -- This encoding never starts with a zero unless the argument was zero.
 -- Negative numbers are preceded by a minus sign. Positive numbers
 -- are not preceded by anything.
 int64Dec :: Int64 -> Builder
-int64Dec w = fromBounded (Bounded.int64Dec w)
+int64Dec w = fromBounded Nat.constant (Bounded.int64Dec w)
 
 -- | Encode a 64-bit unsigned integer as hexadecimal, zero-padding
 -- the encoding to 16 digits. This uses uppercase for the alphabetical
 -- digits. For example, this encodes the number 1022 as @00000000000003FE@.
 word64PaddedUpperHex :: Word64 -> Builder
 word64PaddedUpperHex w =
-  fromBounded (Bounded.word64PaddedUpperHex w)
+  fromBounded Nat.constant (Bounded.word64PaddedUpperHex w)
 
 -- | Encode a 32-bit unsigned integer as hexadecimal, zero-padding
 -- the encoding to 8 digits. This uses uppercase for the alphabetical
 -- digits. For example, this encodes the number 1022 as @000003FE@.
 word32PaddedUpperHex :: Word32 -> Builder
 word32PaddedUpperHex w =
-  fromBounded (Bounded.word32PaddedUpperHex w)
+  fromBounded Nat.constant (Bounded.word32PaddedUpperHex w)
 
 -- | Encode a 16-bit unsigned integer as hexadecimal, zero-padding
 -- the encoding to 4 digits. This uses uppercase for the alphabetical
 -- digits. For example, this encodes the number 1022 as @03FE@.
 word16PaddedUpperHex :: Word16 -> Builder
 word16PaddedUpperHex w =
-  fromBounded (Bounded.word16PaddedUpperHex w)
+  fromBounded Nat.constant (Bounded.word16PaddedUpperHex w)
 
 -- | Encode a 8-bit unsigned integer as hexadecimal, zero-padding
 -- the encoding to 2 digits. This uses uppercase for the alphabetical
 -- digits. For example, this encodes the number 11 as @0B@.
 word8PaddedUpperHex :: Word8 -> Builder
 word8PaddedUpperHex w =
-  fromBounded (Bounded.word8PaddedUpperHex w)
+  fromBounded Nat.constant (Bounded.word8PaddedUpperHex w)
 
 -- | Encode an ASCII char.
 -- Precondition: Input must be an ASCII character. This is not checked.
 ascii :: Char -> Builder
-ascii c = fromBounded (Bounded.char c)
+ascii c = fromBounded Nat.constant (Bounded.char c)
 
 -- | Encode an UTF8 char. This only uses as much space as is required.
 char :: Char -> Builder
-char c = fromBounded (Bounded.char c)
+char c = fromBounded Nat.constant (Bounded.char c)
 
 unST :: ST s a -> State# s -> (# State# s, a #)
 unST (ST f) = f
@@ -326,20 +332,20 @@ shrinkMutableByteArray (MutableByteArray arr) (I# sz) =
 -- | Requires exactly 8 bytes. Dump the octets of a 64-bit
 -- word in a big-endian fashion.
 word64BE :: Word64 -> Builder
-word64BE w = fromBounded (Bounded.word64BE w)
+word64BE w = fromBounded Nat.constant (Bounded.word64BE w)
 
 -- | Requires exactly 4 bytes. Dump the octets of a 32-bit
 -- word in a big-endian fashion.
 word32BE :: Word32 -> Builder
-word32BE w = fromBounded (Bounded.word32BE w)
+word32BE w = fromBounded Nat.constant (Bounded.word32BE w)
 
 -- | Requires exactly 2 bytes. Dump the octets of a 16-bit
 -- word in a big-endian fashion.
 word16BE :: Word16 -> Builder
-word16BE w = fromBounded (Bounded.word16BE w)
+word16BE w = fromBounded Nat.constant (Bounded.word16BE w)
 
 word8 :: Word8 -> Builder
-word8 w = fromBounded (Bounded.word8 w)
+word8 w = fromBounded Nat.constant (Bounded.word8 w)
 
 -- ShortText is already UTF-8 encoded. This is a no-op.
 shortTextToByteArray :: ShortText -> ByteArray
diff --git a/src/Data/ByteArray/Builder/Bounded.hs b/src/Data/ByteArray/Builder/Bounded.hs
index 5db8a12..d4bcb52 100644
--- a/src/Data/ByteArray/Builder/Bounded.hs
+++ b/src/Data/ByteArray/Builder/Bounded.hs
@@ -1,14 +1,14 @@
-{-# language KindSignatures #-}
-{-# language ScopedTypeVariables #-}
 {-# language BangPatterns #-}
-{-# language MagicHash #-}
 {-# language BinaryLiterals #-}
-{-# language UnboxedTuples #-}
-{-# language RankNTypes #-}
-{-# language LambdaCase #-}
-{-# language TypeOperators #-}
 {-# language DataKinds #-}
+{-# language KindSignatures #-}
+{-# language LambdaCase #-}
+{-# language MagicHash #-}
+{-# language RankNTypes #-}
+{-# language ScopedTypeVariables #-}
 {-# language TypeApplications #-}
+{-# language TypeOperators #-}
+{-# language UnboxedTuples #-}
 
 -- | The functions in this module are explict in the amount of bytes they require.
 module Data.ByteArray.Builder.Bounded
@@ -44,35 +44,35 @@ module Data.ByteArray.Builder.Bounded
   , doubleDec
   ) where
 
+import Arithmetic.Types (type (<=), type (:=:))
 import Control.Monad.Primitive
 import Control.Monad.ST
 import Data.Bits
+import Data.ByteArray.Builder.Bounded.Unsafe (Builder(..))
 import Data.Char (ord)
 import Data.Primitive
-import GHC.Exts
-import GHC.ST (ST(ST))
-import GHC.Word (Word8(W8#),Word16(W16#),Word32(W32#),Word64(W64#))
-import GHC.Int (Int64(I64#))
-import GHC.TypeLits (KnownNat,type (+),natVal')
 import Data.Primitive.ByteArray.Offset (MutableByteArrayOffset(..))
-import Arithmetic.Types (type (<=), type (:=:))
-import Data.ByteArray.Builder.Bounded.Unsafe (Builder(..))
+import GHC.Exts
+import GHC.Int (Int64(I64#))
+import GHC.ST (ST(ST))
+import GHC.TypeLits (type (+))
+import GHC.Word (Word8(W8#),Word16(W16#),Word32(W32#),Word64(W64#))
 
+import qualified Arithmetic.Types as Arithmetic
+import qualified Arithmetic.Nat as Nat
 import qualified Data.ByteArray.Builder.Bounded.Unsafe as Unsafe
 import qualified Data.Primitive as PM
 
--- Used internally.
-knownNat :: KnownNat n => Proxy# n -> Int
-{-# inline knownNat #-}
-knownNat p = fromIntegral (natVal' p)
-
--- | Execute the bounded builder.
-run :: forall n. KnownNat n
-  => Builder n -- ^ Builder
+-- | Execute the bounded builder. If the size is a constant,
+-- use @Arithmetic.Nat.constant@ as the first argument to let
+-- GHC conjure up this value for you.
+run ::
+     Arithmetic.Nat n
+  -> Builder n -- ^ Builder
   -> ByteArray
 {-# inline run #-}
-run b = runST $ do
-  arr <- newByteArray (knownNat (proxy# :: Proxy# n))
+run n b = runST $ do
+  arr <- newByteArray (Nat.demote n)
   len <- Unsafe.pasteST b arr 0
   shrinkMutableByteArray arr len
   unsafeFreezeByteArray arr
@@ -80,16 +80,17 @@ run b = runST $ do
 -- | Paste the builder into the byte array starting at offset zero.
 -- This reallocates the byte array if it cannot accomodate the builder,
 -- growing it by the minimum amount necessary.
-pasteGrowST :: forall n s. KnownNat n
-  => Builder n
+pasteGrowST ::
+     Arithmetic.Nat n
+  -> Builder n
   -> MutableByteArrayOffset s
      -- ^ Initial buffer, used linearly. Do not reuse this argument.
   -> ST s (MutableByteArrayOffset s)
      -- ^ Final buffer that accomodated the builder.
 {-# inline pasteGrowST #-}
-pasteGrowST b !(MutableByteArrayOffset{array=arr0,offset=off0}) = do
+pasteGrowST n b !(MutableByteArrayOffset{array=arr0,offset=off0}) = do
   sz0 <- PM.getSizeofMutableByteArray arr0
-  let req = knownNat (proxy# :: Proxy# n)
+  let req = Nat.demote n
   let sz1 = off0 + req
   if sz1 <= sz0
     then do
diff --git a/test/Main.hs b/test/Main.hs
index 37b2cb0..6a2e6f8 100644
--- a/test/Main.hs
+++ b/test/Main.hs
@@ -9,12 +9,12 @@ import Data.ByteArray.Builder
 import Data.Word
 import Data.Char (ord)
 import Data.Primitive (ByteArray)
-import Debug.Trace
 import Test.Tasty (defaultMain,testGroup,TestTree)
 import Test.QuickCheck ((===))
 import Text.Printf (printf)
 import Test.Tasty.HUnit ((@=?))
 
+import qualified Arithmetic.Nat as Nat
 import qualified Data.ByteString as ByteString
 import qualified Data.ByteString.Builder as BB
 import qualified Data.ByteString.Lazy.Char8 as LB
@@ -24,7 +24,6 @@ import qualified Data.Text as T
 import qualified Data.Text.Encoding as TE
 import qualified Data.Vector as V
 import qualified GHC.Exts as Exts
-import qualified Test.QuickCheck as QC
 import qualified Test.Tasty.HUnit as THU
 import qualified Test.Tasty.QuickCheck as TQC
 
@@ -95,8 +94,8 @@ tests = testGroup "Tests"
   , testGroup "alternate"
     [ TQC.testProperty "HexWord64" $ \x y ->
         run 1
-          (  fromBounded (HexWord64.word64PaddedUpperHex x)
-          <> fromBounded (HexWord64.word64PaddedUpperHex y)
+          (  fromBounded Nat.constant (HexWord64.word64PaddedUpperHex x)
+          <> fromBounded Nat.constant (HexWord64.word64PaddedUpperHex y)
           )
         ===
         pack (showWord64PaddedUpperHex x <> showWord64PaddedUpperHex y)