diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 0000000..f6c0b22 --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1 @@ +@byteverse/l3c diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml new file mode 100644 index 0000000..f3f5be4 --- /dev/null +++ b/.github/workflows/build.yaml @@ -0,0 +1,11 @@ +name: build +on: + pull_request: + branches: + - "*" + +jobs: + call-workflow: + uses: byteverse/.github/.github/workflows/build-matrix.yaml@main + with: + cabal-file: bytebuild.cabal diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml new file mode 100644 index 0000000..9411962 --- /dev/null +++ b/.github/workflows/release.yaml @@ -0,0 +1,10 @@ +name: release +on: + push: + tags: + - "*" + +jobs: + call-workflow: + uses: byteverse/.github/.github/workflows/release.yaml@main + secrets: inherit diff --git a/.gitignore b/.gitignore index 28d589b..ccd94e8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +.vscode/ dist dist-* cabal-dev @@ -11,6 +12,7 @@ cabal-dev .hsenv .cabal-sandbox/ cabal.sandbox.config +cabal.project.local *.prof *.aux *.hp diff --git a/CHANGELOG.md b/CHANGELOG.md index 23e6d08..4f03773 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,10 +5,28 @@ Note: Prior to version 0.3.4.0, this library was named `small-bytearray-builder` is now just a compatibility shim to ease the migration process. -## 0.3.15.0 -- 2023-??-?? +## 0.3.16.2 -- 2024-02-06 + +* Restore import statement for `liftA2` to fix build for GHC 9.4. + +## 0.3.16.1 -- 2024-02-02 + +* Remove all CPP +* Drop support for GHC < 9.4 +* Drop support for text < 2.0 + +## 0.3.16.0 -- 2024-01-29 + +* Add `wordPaddedDec3`. +* Add `Data.Bytes.Builder.Avro`. +* Add `word16LEB128`. +* Stop accepting versions of text lower than 2.0. + +## 0.3.15.0 -- 2024-01-05 * Add `Data.Bytes.Builder.Unsafe.pasteUtf8TextJson#` for users who need to perform JSON string encoding without using a builder. +* Add `Data.Bytes.Builder.textJsonString` when building with text 2.0+ ## 0.3.14.0 -- 2023-07-20 @@ -127,7 +145,7 @@ to ease the migration process. * Add `flush`, `copy`, and `insert` for better control when converting byte sequences to builders. * Add `shortByteString` to improve interoperability with the - `bytestring` library. + `bytestring` library. ## 0.2.1.0 -- 2019-09-05 diff --git a/Setup.hs b/Setup.hs deleted file mode 100644 index 9a994af..0000000 --- a/Setup.hs +++ /dev/null @@ -1,2 +0,0 @@ -import Distribution.Simple -main = defaultMain diff --git a/bench/Cell.hs b/bench/Cell.hs index 77d4f61..3c76016 100644 --- a/bench/Cell.hs +++ b/bench/Cell.hs @@ -1,14 +1,14 @@ -{-# language OverloadedLists #-} -{-# language OverloadedStrings #-} +{-# LANGUAGE OverloadedLists #-} +{-# LANGUAGE OverloadedStrings #-} module Cell - ( Cell(..) + ( Cell (..) , cells ) where -import Data.Word (Word32) -import Data.Text.Short (ShortText) import Data.Primitive (SmallArray) +import Data.Text.Short (ShortText) +import Data.Word (Word32) -- A cell in a CSV file data Cell @@ -18,15 +18,14 @@ data Cell -- Some sample data to encode as a CSV cells :: SmallArray (SmallArray Cell) cells = - [ [ CellString "Randy", CellString "Gutiérrez", CellNumber 41, CellNumber 343 ] - , [ CellString "Édith", CellString "Piaf", CellNumber 63, CellNumber 453 ] - , [ CellString "Martha", CellString "Washington", CellNumber 51, CellNumber 634 ] - , [ CellString "Julius", CellString "Caesar", CellNumber 1, CellNumber 6922 ] - , [ CellString "Robert", CellString "Redford", CellNumber 24, CellNumber 617 ] - , [ CellString "Violet", CellString "Crawley", CellNumber 71, CellNumber 150 ] - , [ CellString "Lázaro", CellString "Cárdenas", CellNumber 58, CellNumber 299 ] - , [ CellString "Anastasia", CellString "San Martin", CellNumber 103, CellNumber 3214 ] - , [ CellString "Mad", CellString "Max", CellNumber 37, CellNumber 918 ] - , [ CellString "Sidonie-Gabrielle", CellString "Collette", CellNumber 25, CellNumber 904 ] + [ [CellString "Randy", CellString "Gutiérrez", CellNumber 41, CellNumber 343] + , [CellString "Édith", CellString "Piaf", CellNumber 63, CellNumber 453] + , [CellString "Martha", CellString "Washington", CellNumber 51, CellNumber 634] + , [CellString "Julius", CellString "Caesar", CellNumber 1, CellNumber 6922] + , [CellString "Robert", CellString "Redford", CellNumber 24, CellNumber 617] + , [CellString "Violet", CellString "Crawley", CellNumber 71, CellNumber 150] + , [CellString "Lázaro", CellString "Cárdenas", CellNumber 58, CellNumber 299] + , [CellString "Anastasia", CellString "San Martin", CellNumber 103, CellNumber 3214] + , [CellString "Mad", CellString "Max", CellNumber 37, CellNumber 918] + , [CellString "Sidonie-Gabrielle", CellString "Collette", CellNumber 25, CellNumber 904] ] - diff --git a/bench/Main.hs b/bench/Main.hs index cc29af6..1c93f57 100644 --- a/bench/Main.hs +++ b/bench/Main.hs @@ -1,9 +1,8 @@ -{-# language LambdaCase #-} -{-# language OverloadedStrings #-} +{-# LANGUAGE OverloadedStrings #-} import Data.Primitive (ByteArray) import Data.Word (Word64) -import Gauge (bgroup,bench,whnf) +import Gauge (bench, bgroup, whnf) import Gauge.Main (defaultMain) import qualified Arithmetic.Nat as Nat @@ -11,70 +10,87 @@ import qualified Data.Bytes.Builder as B import qualified Data.Bytes.Builder.Bounded as U import qualified Cell -import qualified SimpleCsv import qualified HexWord64 +import qualified SimpleCsv import qualified Word16Tree main :: IO () -main = defaultMain - [ bgroup "w64" - [ bgroup "hex" - [ bench "library" (whnf encodeHexWord64s w64s) - , bench "loop" (whnf encodeHexWord64sLoop w64s) - ] +main = + defaultMain + [ bgroup + "w64" + [ bgroup + "hex" + [ bench "library" (whnf encodeHexWord64s w64s) + , bench "loop" (whnf encodeHexWord64sLoop w64s) + ] + ] + , bgroup + "unbounded" + [ bench "csv-no-escape" $ + whnf + (\x -> B.run 4080 (SimpleCsv.encodeRows x)) + Cell.cells + , bench "word-16-tree-small" $ + whnf + (\x -> B.run 4080 (Word16Tree.encode x)) + Word16Tree.exampleSmall + , bench "word-16-tree-2000" $ + whnf + (\x -> B.run ((4096 * 16) - 16) (Word16Tree.encode x)) + Word16Tree.example2000 + , bench "word-16-tree-9000" $ + whnf + (\x -> B.run ((4096 * 64) - 16) (Word16Tree.encode x)) + Word16Tree.example9000 + ] ] - , bgroup "unbounded" - [ bench "csv-no-escape" $ whnf - (\x -> B.run 4080 (SimpleCsv.encodeRows x)) - Cell.cells - , bench "word-16-tree-small" $ whnf - (\x -> B.run 4080 (Word16Tree.encode x)) - Word16Tree.exampleSmall - , bench "word-16-tree-2000" $ whnf - (\x -> B.run ((4096 * 16) - 16) (Word16Tree.encode x)) - Word16Tree.example2000 - , bench "word-16-tree-9000" $ whnf - (\x -> B.run ((4096 * 64) - 16) (Word16Tree.encode x)) - Word16Tree.example9000 - ] - ] w64s :: Word64s -w64s = Word64s - 0xde2b8a480cf77113 - 0x48f1668ca2a68b45 - 0xd262fbaa0b2f473c - 0xbab20547f4919d9f - 0xb7ec16121704db43 - 0x9c259f5bfa90e1eb - 0xd451eca11d9873ad - 0xbd927e8d4c879d02 +w64s = + Word64s + 0xde2b8a480cf77113 + 0x48f1668ca2a68b45 + 0xd262fbaa0b2f473c + 0xbab20547f4919d9f + 0xb7ec16121704db43 + 0x9c259f5bfa90e1eb + 0xd451eca11d9873ad + 0xbd927e8d4c879d02 -data Word64s = Word64s - !Word64 !Word64 !Word64 !Word64 - !Word64 !Word64 !Word64 !Word64 +data Word64s + = Word64s + !Word64 + !Word64 + !Word64 + !Word64 + !Word64 + !Word64 + !Word64 + !Word64 encodeHexWord64s :: Word64s -> ByteArray -{-# noinline encodeHexWord64s #-} -encodeHexWord64s (Word64s a b c d e f g h) = U.run Nat.constant $ - U.word64PaddedUpperHex a `U.append` - U.word64PaddedUpperHex b `U.append` - U.word64PaddedUpperHex c `U.append` - U.word64PaddedUpperHex d `U.append` - U.word64PaddedUpperHex e `U.append` - U.word64PaddedUpperHex f `U.append` - U.word64PaddedUpperHex g `U.append` - U.word64PaddedUpperHex h +{-# NOINLINE encodeHexWord64s #-} +encodeHexWord64s (Word64s a b c d e f g h) = + U.run Nat.constant $ + U.word64PaddedUpperHex a + `U.append` U.word64PaddedUpperHex b + `U.append` U.word64PaddedUpperHex c + `U.append` U.word64PaddedUpperHex d + `U.append` U.word64PaddedUpperHex e + `U.append` U.word64PaddedUpperHex f + `U.append` U.word64PaddedUpperHex g + `U.append` U.word64PaddedUpperHex h encodeHexWord64sLoop :: Word64s -> ByteArray -{-# noinline encodeHexWord64sLoop #-} -encodeHexWord64sLoop (Word64s a b c d e f g h) = U.run Nat.constant $ - HexWord64.word64PaddedUpperHex a `U.append` - HexWord64.word64PaddedUpperHex b `U.append` - HexWord64.word64PaddedUpperHex c `U.append` - HexWord64.word64PaddedUpperHex d `U.append` - HexWord64.word64PaddedUpperHex e `U.append` - HexWord64.word64PaddedUpperHex f `U.append` - HexWord64.word64PaddedUpperHex g `U.append` - HexWord64.word64PaddedUpperHex h - +{-# NOINLINE encodeHexWord64sLoop #-} +encodeHexWord64sLoop (Word64s a b c d e f g h) = + U.run Nat.constant $ + HexWord64.word64PaddedUpperHex a + `U.append` HexWord64.word64PaddedUpperHex b + `U.append` HexWord64.word64PaddedUpperHex c + `U.append` HexWord64.word64PaddedUpperHex d + `U.append` HexWord64.word64PaddedUpperHex e + `U.append` HexWord64.word64PaddedUpperHex f + `U.append` HexWord64.word64PaddedUpperHex g + `U.append` HexWord64.word64PaddedUpperHex h diff --git a/bench/SimpleCsv.hs b/bench/SimpleCsv.hs index 1b47345..34a2ded 100644 --- a/bench/SimpleCsv.hs +++ b/bench/SimpleCsv.hs @@ -1,4 +1,4 @@ -{-# language LambdaCase #-} +{-# LANGUAGE LambdaCase #-} -- A variant of CSV encoding that does not perform -- any escaping or quoting. This is in its own module @@ -8,22 +8,24 @@ module SimpleCsv ( encodeRows ) where -import Cell (Cell(..)) +import Cell (Cell (..)) import Data.Primitive (SmallArray) -import qualified Data.Foldable as F import qualified Data.Bytes.Builder as B +import qualified Data.Foldable as F encodeRows :: SmallArray (SmallArray Cell) -> B.Builder -encodeRows = F.foldr - (\r x -> encodeSimpleCsvRow r (B.ascii '\n' <> x)) - mempty +encodeRows = + F.foldr + (\r x -> encodeSimpleCsvRow r (B.ascii '\n' <> x)) + mempty encodeSimpleCsvRow :: SmallArray Cell -> B.Builder -> B.Builder -encodeSimpleCsvRow cs b = F.foldr - (\c x -> encodeSimpleCsvCell c <> B.ascii ',' <> x) - b - cs +encodeSimpleCsvRow cs b = + F.foldr + (\c x -> encodeSimpleCsvCell c <> B.ascii ',' <> x) + b + cs encodeSimpleCsvCell :: Cell -> B.Builder encodeSimpleCsvCell = \case diff --git a/bytebuild.cabal b/bytebuild.cabal index 309042d..ffb6cd7 100644 --- a/bytebuild.cabal +++ b/bytebuild.cabal @@ -1,17 +1,17 @@ -cabal-version: 2.2 -name: bytebuild -version: 0.3.15.0 -synopsis: Build byte arrays +cabal-version: 2.2 +name: bytebuild +version: 0.3.16.2 +synopsis: Build byte arrays description: This is similar to the builder facilities provided by `Data.ByteString.Builder`. It is intended to be used in situations where the following apply: . * An individual entity will be serialized as a small - number of bytes (less than 512). + number of bytes (less than 512). . * A large number (more than 32) of entities will be serialized - one after another without anything between them. + one after another without anything between them. . Unlike builders from the `bytestring` package, these builders do not track their state when they run out of space. A builder @@ -19,109 +19,136 @@ description: of the next chunk. This strategy for building is suitable for most CSVs and several line protocols (carbon, InfluxDB, etc.). -homepage: https://github.com/byteverse/bytebuild -bug-reports: https://github.com/byteverse/bytebuild/issues -license: BSD-3-Clause -license-file: LICENSE -author: Andrew Martin -maintainer: andrew.thaddeus@gmail.com -copyright: 2019 Andrew Martin -category: Data -extra-source-files: CHANGELOG.md +homepage: https://github.com/byteverse/bytebuild +bug-reports: https://github.com/byteverse/bytebuild/issues +license: BSD-3-Clause +license-file: LICENSE +author: Andrew Martin +maintainer: amartin@layer3com.com +copyright: 2019 Andrew Martin +category: Data +extra-doc-files: CHANGELOG.md +tested-with: GHC ==9.4.8 || ==9.6.5 || ==9.8.2 || ==9.10.1 + +common build-settings + default-language: Haskell2010 + ghc-options: -Wall -Wunused-packages + +flag quoter + manual: True + description: Include the quasiquoter + default: True flag checked - manual: True + manual: True description: Add bounds-checking to primitive array operations - default: False + default: False library + import: build-settings exposed-modules: Data.Bytes.Builder - Data.Bytes.Builder.Class - Data.Bytes.Builder.Template - Data.Bytes.Builder.Unsafe + Data.Bytes.Builder.Avro Data.Bytes.Builder.Bounded Data.Bytes.Builder.Bounded.Class Data.Bytes.Builder.Bounded.Unsafe + Data.Bytes.Builder.Class + Data.Bytes.Builder.Unsafe + other-modules: Compat Op - reexported-modules: - Data.Bytes.Chunks + + reexported-modules: Data.Bytes.Chunks build-depends: - , base >=4.12.0.0 && <4.19 - , byteslice >=0.2.6 && <0.3 - , bytestring >=0.10.8.2 && <0.12 - , haskell-src-meta >=0.8 - , integer-logarithms >=1.0.3 && <1.1 - , natural-arithmetic >=0.1 && <0.3 - , primitive-offset >=0.2 && <0.3 - , run-st >=0.1.2 && <0.2 - , template-haskell >=2.16 - , text >=1.2 && <2.2 - , text-short >=0.1.3 && <0.2 - , wide-word >=0.1.0.9 && <0.2 + , base >=4.17.0.0 && <4.21 + , byteslice >=0.2.6 && <0.3 + , bytestring >=0.10.8.2 && <0.13 + , integer-logarithms >=1.0.3 && <1.1 + , natural-arithmetic >=0.1 && <0.3 + , primitive-offset >=0.2 && <0.3 + , run-st >=0.1.2 && <0.2 + , text >=2.0 && <2.2 + , text-short >=0.1.3 && <0.2 + , wide-word >=0.1.0.9 && <0.2 , zigzag - if impl(ghc >= 9.2) + + if flag(quoter) + build-depends: + haskell-src-meta >=0.8.13 + , template-haskell >=2.16 + exposed-modules: Data.Bytes.Builder.Template + + if impl(ghc >=9.2) hs-source-dirs: src-9.2 + else - if impl(ghc >= 8.10) + if impl(ghc >=8.10) hs-source-dirs: src-9.0 + if flag(checked) - build-depends: primitive-checked >= 0.7 && <0.10 + build-depends: primitive-checked >=0.7 && <0.10 hs-source-dirs: src-checked + else - build-depends: primitive >= 0.7 && <0.10 + build-depends: primitive >=0.7 && <0.10 hs-source-dirs: src-unchecked - ghc-options: -Wall -O2 - hs-source-dirs: src - default-language: Haskell2010 - c-sources: cbits/bytebuild_custom.c + + ghc-options: -O2 + hs-source-dirs: src + c-sources: cbits/bytebuild_custom.c test-suite test - default-language: Haskell2010 - type: exitcode-stdio-1.0 - hs-source-dirs: test, common - main-is: Main.hs - ghc-options: -O2 -Wall + import: build-settings + type: exitcode-stdio-1.0 + hs-source-dirs: test common + main-is: Main.hs other-modules: HexWord64 Word16Tree + build-depends: - , QuickCheck >=2.13.1 && <2.15 - , base >=4.12.0.0 && <5 + , base >=4.12.0.0 && <5 , bytebuild , byteslice , bytestring , natural-arithmetic , primitive - , primitive-unlifted >=0.1.2 - , quickcheck-classes >=0.6.4 - , quickcheck-instances >=0.3.22 - , text-short - , tasty >=1.2.3 && <1.6 - , tasty-hunit >=0.10.0.2 && <0.11 - , tasty-quickcheck >=0.10.1 && <0.11 - , text >=1.2 && <2.2 - , vector - , wide-word >=0.1.0.9 && <0.2 + , QuickCheck >=2.13.1 && <2.16 + , quickcheck-instances >=0.3.22 + , tasty >=1.2.3 && <1.6 + , tasty-hunit >=0.10.0.2 && <0.11 + , tasty-quickcheck >=0.10.1 && <0.11 + , text >=2.0 && <2.2 + , wide-word >=0.1.0.9 && <0.2 + + default-extensions: CPP + + if flag(quoter) + cpp-options: -DQUOTER + build-depends: text-short benchmark bench - type: exitcode-stdio-1.0 + import: build-settings + type: exitcode-stdio-1.0 build-depends: , base , bytebuild - , gauge >= 0.2.4 + , byteslice + , gauge >=0.2.4 , natural-arithmetic , primitive , text-short - , byteslice - ghc-options: -Wall -O2 - default-language: Haskell2010 - hs-source-dirs: bench, common - main-is: Main.hs + + ghc-options: -O2 + hs-source-dirs: bench common + main-is: Main.hs other-modules: Cell HexWord64 SimpleCsv Word16Tree + +source-repository head + type: git + location: git://github.com/byteverse/bytebuild.git diff --git a/common/HexWord64.hs b/common/HexWord64.hs index b3af3c8..23acfe4 100644 --- a/common/HexWord64.hs +++ b/common/HexWord64.hs @@ -1,10 +1,10 @@ -{-# language BangPatterns #-} -{-# language ScopedTypeVariables #-} -{-# language DataKinds #-} -{-# language UnboxedTuples #-} -{-# language MagicHash #-} -{-# language PolyKinds #-} -{-# language TypeApplications #-} +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE PolyKinds #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TypeApplications #-} +{-# LANGUAGE UnboxedTuples #-} module HexWord64 ( word64PaddedUpperHex @@ -15,34 +15,37 @@ module HexWord64 -- the hoop jumping, the explicit loop used here is still outperformed -- by just inlining the loop. -import GHC.ST (ST(ST)) import Data.Bits -import Data.Bytes.Builder.Bounded.Unsafe (Builder,construct) +import Data.Bytes.Builder.Bounded.Unsafe (Builder, construct) import Data.Primitive import Data.Word import GHC.Exts +import GHC.ST (ST (ST)) import qualified Control.Monad.Primitive as PM type ST# s (a :: TYPE (r :: RuntimeRep)) = State# s -> (# State# s, a #) word64PaddedUpperHex :: Word64 -> Builder 16 -word64PaddedUpperHex w = construct $ \a b -> ST - (\s0 -> case word64PaddedUpperHexLoop w 60 a b s0 of - (# s1, i #) -> (# s1, I# i #) - ) +word64PaddedUpperHex w = construct $ \a b -> + ST + ( \s0 -> case word64PaddedUpperHexLoop w 60 a b s0 of + (# s1, i #) -> (# s1, I# i #) + ) word64PaddedUpperHexLoop :: forall s. Word64 -> Int -> MutableByteArray s -> Int -> ST# s Int# -word64PaddedUpperHexLoop !w !shiftAmount !arr !i@(I# i#) s0 = if shiftAmount >= 0 - then case PM.internal @(ST s) (writeByteArray arr i (toHexUpper (unsafeShiftR w shiftAmount))) s0 of - (# s1, (_ :: ()) #) -> word64PaddedUpperHexLoop w (shiftAmount - 4) arr (i + 1) s1 - else (# s0, i# #) +word64PaddedUpperHexLoop !w !shiftAmount !arr !i@(I# i#) s0 = + if shiftAmount >= 0 + then case PM.internal @(ST s) (writeByteArray arr i (toHexUpper (unsafeShiftR w shiftAmount))) s0 of + (# s1, (_ :: ()) #) -> word64PaddedUpperHexLoop w (shiftAmount - 4) arr (i + 1) s1 + else (# s0, i# #) toHexUpper :: Word64 -> Word8 -toHexUpper w' = fromIntegral - $ (complement theMask .&. loSolved) - .|. (theMask .&. hiSolved) - where +toHexUpper w' = + fromIntegral $ + (complement theMask .&. loSolved) + .|. (theMask .&. hiSolved) + where w = w' .&. 0xF -- This is all ones if the value was >= 10 theMask = (1 .&. unsafeShiftR (w - 10) 63) - 1 diff --git a/common/Word16Tree.hs b/common/Word16Tree.hs index 2163a5c..a07d056 100644 --- a/common/Word16Tree.hs +++ b/common/Word16Tree.hs @@ -1,4 +1,4 @@ -{-# language BangPatterns #-} +{-# LANGUAGE BangPatterns #-} module Word16Tree ( Word16Tree @@ -9,11 +9,11 @@ module Word16Tree , expectedSmall ) where -import Data.Bytes.Builder as B -import Data.Word (Word16) -import Data.Primitive (ByteArray) import qualified Data.Bytes as Bytes +import Data.Bytes.Builder as B import qualified Data.Bytes.Text.Ascii +import Data.Primitive (ByteArray) +import Data.Word (Word16) data Word16Tree = Branch !Word16Tree !Word16Tree @@ -23,63 +23,62 @@ encode :: Word16Tree -> Builder encode (Leaf w) = B.word16PaddedUpperHex w encode (Branch a b) = B.ascii '(' - <> - encode a - <> - B.ascii ',' - <> - encode b - <> - B.ascii ')' + <> encode a + <> B.ascii ',' + <> encode b + <> B.ascii ')' expectedSmall :: ByteArray -expectedSmall = Bytes.toByteArray $ Data.Bytes.Text.Ascii.fromString - "((AB59,(1F33,2E71)),((((FA9A,247B),890C),(0F13,((55BF,7CF1),389B))),1205))" - +expectedSmall = + Bytes.toByteArray $ + Data.Bytes.Text.Ascii.fromString + "((AB59,(1F33,2E71)),((((FA9A,247B),890C),(0F13,((55BF,7CF1),389B))),1205))" exampleSmall :: Word16Tree -exampleSmall = Branch - (Branch - (Leaf 0xAB59) - (Branch - (Leaf 0x1F33) - (Leaf 0x2E71) - ) - ) - (Branch - (Branch - (Branch - (Branch - (Leaf 0xFA9A) - (Leaf 0x247B) +exampleSmall = + Branch + ( Branch + (Leaf 0xAB59) + ( Branch + (Leaf 0x1F33) + (Leaf 0x2E71) ) - (Leaf 0x890C) - ) - (Branch - (Leaf 0x0F13) - (Branch - (Branch - (Leaf 0x55BF) - (Leaf 0x7CF1) - ) - (Leaf 0x389B) - ) - ) ) - (Leaf 0x1205) - ) + ( Branch + ( Branch + ( Branch + ( Branch + (Leaf 0xFA9A) + (Leaf 0x247B) + ) + (Leaf 0x890C) + ) + ( Branch + (Leaf 0x0F13) + ( Branch + ( Branch + (Leaf 0x55BF) + (Leaf 0x7CF1) + ) + (Leaf 0x389B) + ) + ) + ) + (Leaf 0x1205) + ) example2000 :: Word16Tree -{-# noinline example2000 #-} +{-# NOINLINE example2000 #-} example2000 = balanced 0 2000 example9000 :: Word16Tree -{-# noinline example9000 #-} +{-# NOINLINE example9000 #-} example9000 = balanced 0 9000 balanced :: Word16 -> Word16 -> Word16Tree balanced !off !n | n == 0 = Leaf off | n == 1 = Leaf (off + 1) - | otherwise = let x = div n 2 in - Branch (balanced off x) (balanced (off + x) (n - x)) + | otherwise = + let x = div n 2 + in Branch (balanced off x) (balanced (off + x) (n - x)) diff --git a/fourmolu.yaml b/fourmolu.yaml new file mode 100644 index 0000000..40cd005 --- /dev/null +++ b/fourmolu.yaml @@ -0,0 +1,51 @@ +# Number of spaces per indentation step +indentation: 2 + +# Max line length for automatic line breaking +column-limit: 200 + +# Styling of arrows in type signatures (choices: trailing, leading, or leading-args) +function-arrows: trailing + +# How to place commas in multi-line lists, records, etc. (choices: leading or trailing) +comma-style: leading + +# Styling of import/export lists (choices: leading, trailing, or diff-friendly) +import-export-style: leading + +# Whether to full-indent or half-indent 'where' bindings past the preceding body +indent-wheres: false + +# Whether to leave a space before an opening record brace +record-brace-space: true + +# Number of spaces between top-level declarations +newlines-between-decls: 1 + +# How to print Haddock comments (choices: single-line, multi-line, or multi-line-compact) +haddock-style: multi-line + +# How to print module docstring +haddock-style-module: null + +# Styling of let blocks (choices: auto, inline, newline, or mixed) +let-style: auto + +# How to align the 'in' keyword with respect to the 'let' keyword (choices: left-align, right-align, or no-space) +in-style: right-align + +# Whether to put parentheses around a single constraint (choices: auto, always, or never) +single-constraint-parens: always + +# Output Unicode syntax (choices: detect, always, or never) +unicode: never + +# Give the programmer more choice on where to insert blank lines +respectful: true + +# Fixity information for operators +fixities: [] + +# Module reexports Fourmolu should know about +reexports: [] + diff --git a/src-9.0/Compat.hs b/src-9.0/Compat.hs index b7f978a..bf82334 100644 --- a/src-9.0/Compat.hs +++ b/src-9.0/Compat.hs @@ -1,4 +1,4 @@ -{-# language MagicHash #-} +{-# LANGUAGE MagicHash #-} -- This is actually used with both GHC 8.10 and with GHC 9.0. -- The name of the directory is a little misleading. @@ -14,40 +14,40 @@ module Compat , word32ToWord# ) where -import GHC.Exts (Int#,Word#) +import GHC.Exts (Int#, Word#) int8ToInt# :: Int# -> Int# -{-# inline int8ToInt# #-} +{-# INLINE int8ToInt# #-} int8ToInt# x = x int16ToInt# :: Int# -> Int# -{-# inline int16ToInt# #-} +{-# INLINE int16ToInt# #-} int16ToInt# x = x int32ToInt# :: Int# -> Int# -{-# inline int32ToInt# #-} +{-# INLINE int32ToInt# #-} int32ToInt# x = x wordToWord8# :: Word# -> Word# -{-# inline wordToWord8# #-} -wordToWord8# x = x +{-# INLINE wordToWord8# #-} +wordToWord8# x = x wordToWord16# :: Word# -> Word# -{-# inline wordToWord16# #-} +{-# INLINE wordToWord16# #-} wordToWord16# x = x wordToWord32# :: Word# -> Word# -{-# inline wordToWord32# #-} +{-# INLINE wordToWord32# #-} wordToWord32# x = x word8ToWord# :: Word# -> Word# -{-# inline word8ToWord# #-} +{-# INLINE word8ToWord# #-} word8ToWord# x = x word16ToWord# :: Word# -> Word# -{-# inline word16ToWord# #-} +{-# INLINE word16ToWord# #-} word16ToWord# x = x word32ToWord# :: Word# -> Word# -{-# inline word32ToWord# #-} +{-# INLINE word32ToWord# #-} word32ToWord# x = x diff --git a/src-9.2/Compat.hs b/src-9.2/Compat.hs index 177c94c..fb5a93b 100644 --- a/src-9.2/Compat.hs +++ b/src-9.2/Compat.hs @@ -1,10 +1,10 @@ -{-# language MagicHash #-} +{-# LANGUAGE MagicHash #-} -module Compat +module Compat ( int8ToInt# , int16ToInt# , int32ToInt# - , wordToWord8# + , wordToWord8# , wordToWord16# , wordToWord32# , word8ToWord# diff --git a/src-checked/Op.hs b/src-checked/Op.hs index a9b4211..d41684d 100644 --- a/src-checked/Op.hs +++ b/src-checked/Op.hs @@ -1,5 +1,5 @@ -{-# language MagicHash #-} -{-# language UnboxedTuples #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE UnboxedTuples #-} module Op ( writeCharArray# @@ -7,9 +7,9 @@ module Op , copyMutableByteArray# ) where -import GHC.Exts ((<#),(>=#),State#,Int#,MutableByteArray#,ByteArray#,Char#) -import GHC.Int (Int(I#)) +import GHC.Exts (ByteArray#, Char#, Int#, MutableByteArray#, State#, (<#), (>=#)) import qualified GHC.Exts as Exts +import GHC.Int (Int (I#)) writeCharArray# :: MutableByteArray# s -> Int# -> Char# -> State# s -> State# s writeCharArray# arr i v st = case i <# 0# of @@ -27,8 +27,8 @@ copyByteArray# src soff dst doff len s0 = , I# doff >= 0 , I# len >= 0 , I# doff + I# len <= I# sz - , I# soff + I# len <= I# (Exts.sizeofByteArray# src) - -> Exts.copyByteArray# src soff dst doff len s1 + , I# soff + I# len <= I# (Exts.sizeofByteArray# src) -> + Exts.copyByteArray# src soff dst doff len s1 | otherwise -> error "copyByteArray#: index range out of bounds" copyMutableByteArray# :: MutableByteArray# s -> Int# -> MutableByteArray# s -> Int# -> Int# -> State# s -> State# s @@ -40,6 +40,6 @@ copyMutableByteArray# src soff dst doff len s0 = , I# doff >= 0 , I# len >= 0 , I# doff + I# len <= I# szDst - , I# soff + I# len <= I# szSrc - -> Exts.copyMutableByteArray# src soff dst doff len s2 + , I# soff + I# len <= I# szSrc -> + Exts.copyMutableByteArray# src soff dst doff len s2 | otherwise -> error "copyMutableByteArray#: index range out of bounds" diff --git a/src-unchecked/Op.hs b/src-unchecked/Op.hs index 32874e2..9092c35 100644 --- a/src-unchecked/Op.hs +++ b/src-unchecked/Op.hs @@ -1,4 +1,4 @@ -{-# language MagicHash #-} +{-# LANGUAGE MagicHash #-} module Op ( writeCharArray# @@ -6,4 +6,4 @@ module Op , copyMutableByteArray# ) where -import GHC.Exts (copyMutableByteArray#,writeCharArray#,copyByteArray#,copyMutableByteArray#) +import GHC.Exts (copyByteArray#, copyMutableByteArray#, writeCharArray#) diff --git a/src/Data/Bytes/Builder.hs b/src/Data/Bytes/Builder.hs index 7d15de7..1a89a4a 100644 --- a/src/Data/Bytes/Builder.hs +++ b/src/Data/Bytes/Builder.hs @@ -1,19 +1,18 @@ -{-# language CPP #-} -{-# language BangPatterns #-} -{-# language DataKinds #-} -{-# language DuplicateRecordFields #-} -{-# language LambdaCase #-} -{-# language MagicHash #-} -{-# language NumericUnderscores #-} -{-# language RankNTypes #-} -{-# language ScopedTypeVariables #-} -{-# language TypeApplications #-} -{-# language UnboxedTuples #-} +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE DuplicateRecordFields #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE NumericUnderscores #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TypeApplications #-} +{-# LANGUAGE UnboxedTuples #-} module Data.Bytes.Builder ( -- * Bounded Primitives Builder , fromBounded + -- * Evaluation , run , runOnto @@ -21,6 +20,7 @@ module Data.Bytes.Builder , reversedOnto , putMany , putManyConsLength + -- * Materialized Byte Sequences , bytes , chunks @@ -30,19 +30,21 @@ module Data.Bytes.Builder , insert , byteArray , shortByteString -#if MIN_VERSION_text(2,0,0) , textUtf8 -#endif + , textJsonString , shortTextUtf8 , shortTextJsonString , cstring , cstring# , cstringLen , stringUtf8 + -- * Byte Sequence Encodings , sevenEightRight , sevenEightSmile + -- * Encode Integral Types + -- ** Human-Readable , word64Dec , word32Dec @@ -56,16 +58,21 @@ module Data.Bytes.Builder , int8Dec , intDec , integerDec + -- * Unsigned Words + -- ** 64-bit , word64PaddedUpperHex + -- ** 32-bit , word32PaddedUpperHex + -- ** 16-bit , word16PaddedUpperHex , word16PaddedLowerHex , word16LowerHex , word16UpperHex + -- ** 8-bit , word8PaddedUpperHex , word8LowerHex @@ -78,9 +85,12 @@ module Data.Bytes.Builder , ascii7 , ascii8 , char + -- ** Machine-Readable + -- *** One , word8 + -- **** Big Endian , word256BE , word128BE @@ -90,6 +100,7 @@ module Data.Bytes.Builder , int64BE , int32BE , int16BE + -- **** Little Endian , word256LE , word128LE @@ -99,19 +110,24 @@ module Data.Bytes.Builder , int64LE , int32LE , int16LE + -- **** LEB128 , intLEB128 , int32LEB128 , int64LEB128 , wordLEB128 + , word16LEB128 , word32LEB128 , word64LEB128 + -- **** VLQ , wordVlq , word32Vlq , word64Vlq + -- *** Many , word8Array + -- **** Big Endian , word16ArrayBE , word32ArrayBE @@ -121,6 +137,7 @@ module Data.Bytes.Builder , int64ArrayBE , int32ArrayBE , int16ArrayBE + -- **** Little Endian , word16ArrayLE , word32ArrayLE @@ -130,56 +147,67 @@ module Data.Bytes.Builder , int64ArrayLE , int32ArrayLE , int16ArrayLE + -- ** Prefixing with Length , consLength , consLength32LE , consLength32BE , consLength64BE + -- * Encode Floating-Point Types + -- ** Human-Readable , doubleDec + -- * Replication , replicate + -- * Control , flush + -- * Rebuild , rebuild ) where import Prelude hiding (replicate) -import Control.Exception (SomeException,toException) -import Control.Monad.IO.Class (MonadIO,liftIO) -import Control.Monad.ST (ST,runST) -import Data.Bits ((.&.),(.|.),unsafeShiftL,unsafeShiftR) -import Data.Bytes.Builder.Unsafe (addCommitsLength,copyReverseCommits) -import Data.Bytes.Builder.Unsafe (Builder(Builder),commitDistance1) -import Data.Bytes.Builder.Unsafe (BuilderState(BuilderState),pasteIO) -import Data.Bytes.Builder.Unsafe (Commits(Initial,Mutable,Immutable)) -import Data.Bytes.Builder.Unsafe (commitsOntoChunks) -import Data.Bytes.Builder.Unsafe (reverseCommitsOntoChunks) -import Data.Bytes.Builder.Unsafe (stringUtf8,cstring,fromEffect) -import Data.Bytes.Builder.Unsafe (pasteUtf8TextJson#) -import Data.Bytes.Chunks (Chunks(ChunksCons,ChunksNil)) -import Data.Bytes.Types (Bytes(Bytes),MutableBytes(MutableBytes)) -import Data.ByteString.Short.Internal (ShortByteString(SBS)) +import Control.Exception (SomeException, toException) +import Control.Monad.IO.Class (MonadIO, liftIO) +import Control.Monad.ST (ST, runST) +import Data.Bits (unsafeShiftL, unsafeShiftR, (.&.), (.|.)) +import Data.ByteString.Short.Internal (ShortByteString (SBS)) +import Data.Bytes.Builder.Unsafe + ( Builder (Builder) + , BuilderState (BuilderState) + , Commits (Immutable, Initial, Mutable) + , addCommitsLength + , commitDistance1 + , commitsOntoChunks + , copyReverseCommits + , cstring + , fromEffect + , pasteIO + , pasteUtf8TextJson# + , reverseCommitsOntoChunks + , stringUtf8 + ) +import Data.Bytes.Chunks (Chunks (ChunksCons, ChunksNil)) +import Data.Bytes.Types (Bytes (Bytes), MutableBytes (MutableBytes)) import Data.Foldable (foldlM) -import Data.Int (Int64,Int32,Int16,Int8) -import Data.Primitive (ByteArray(..),MutableByteArray(..),PrimArray(..)) +import Data.Int (Int16, Int32, Int64, Int8) +import Data.Primitive (ByteArray (..), MutableByteArray (..), PrimArray (..)) import Data.Text.Short (ShortText) -import Data.WideWord (Word128,Word256) -import Data.Word (Word64,Word32,Word16,Word8) -import Data.Word.Zigzag (toZigzagNative,toZigzag32,toZigzag64) +import Data.WideWord (Word128, Word256) +import Data.Word (Word16, Word32, Word64, Word8) +import Data.Word.Zigzag (toZigzag32, toZigzag64, toZigzagNative) import Foreign.C.String (CStringLen) -import GHC.ByteOrder (ByteOrder(BigEndian,LittleEndian),targetByteOrder) -import GHC.Exts (MutableByteArray#,Addr#,(*#),oneShot) -import GHC.Exts (Int(I#),Int#,State#,ByteArray#,(>=#)) -import GHC.Exts (RealWorld,(+#),(-#),(<#)) +import GHC.ByteOrder (ByteOrder (BigEndian, LittleEndian), targetByteOrder) +import GHC.Exts (Addr#, ByteArray#, Int (I#), Int#, MutableByteArray#, RealWorld, State#, oneShot, (*#), (+#), (-#), (<#), (>=#)) +import GHC.IO (IO (IO), stToIO) import GHC.Integer.Logarithms.Compat (integerLog2#) -import GHC.IO (IO(IO),stToIO) -import GHC.Natural (naturalFromInteger,naturalToInteger) -import GHC.ST (ST(ST)) -import GHC.Word (Word(W#),Word8(W8#)) +import GHC.Natural (naturalFromInteger, naturalToInteger) +import GHC.ST (ST (ST)) +import GHC.Word (Word (W#), Word8 (W8#)) import Numeric.Natural (Natural) import qualified Compat as C @@ -194,186 +222,233 @@ import qualified Data.Text.Short as TS import qualified GHC.Exts as Exts import qualified Op as Op -#if MIN_VERSION_text(2,0,0) import Data.Text (Text) -import qualified Data.Text.Internal as I import qualified Data.Text.Array as A -#endif +import qualified Data.Text.Internal as I -- | Run a builder. run :: - Int -- ^ Size of initial chunk (use 4080 if uncertain) - -> Builder -- ^ Builder - -> Chunks + -- | Size of initial chunk (use 4080 if uncertain) + Int -> + -- | Builder + Builder -> + Chunks run !hint bldr = runOnto hint bldr ChunksNil --- | Run a builder. The resulting chunks are consed onto the --- beginning of an existing sequence of chunks. +{- | Run a builder. The resulting chunks are consed onto the +beginning of an existing sequence of chunks. +-} runOnto :: - Int -- ^ Size of initial chunk (use 4080 if uncertain) - -> Builder -- ^ Builder - -> Chunks -- ^ Suffix - -> Chunks -runOnto hint@(I# hint# ) (Builder f) cs0 = runST $ do + -- | Size of initial chunk (use 4080 if uncertain) + Int -> + -- | Builder + Builder -> + -- | Suffix + Chunks -> + Chunks +runOnto hint@(I# hint#) (Builder f) cs0 = runST $ do MutableByteArray buf0 <- PM.newByteArray hint cs <- ST $ \s0 -> case f buf0 0# hint# Initial s0 of (# s1, bufX, offX, _, csX #) -> (# s1, Mutable bufX offX csX #) reverseCommitsOntoChunks cs0 cs --- | Variant of 'runOnto' that additionally returns the number of bytes --- consed onto the suffix. +{- | Variant of 'runOnto' that additionally returns the number of bytes +consed onto the suffix. +-} runOntoLength :: - Int -- ^ Size of initial chunk (use 4080 if uncertain) - -> Builder -- ^ Builder - -> Chunks -- ^ Suffix - -> (Int,Chunks) -runOntoLength hint@(I# hint# ) (Builder f) cs0 = runST $ do + -- | Size of initial chunk (use 4080 if uncertain) + Int -> + -- | Builder + Builder -> + -- | Suffix + Chunks -> + (Int, Chunks) +runOntoLength hint@(I# hint#) (Builder f) cs0 = runST $ do MutableByteArray buf0 <- PM.newByteArray hint cs <- ST $ \s0 -> case f buf0 0# hint# Initial s0 of (# s1, bufX, offX, _, csX #) -> (# s1, Mutable bufX offX csX #) let !n = addCommitsLength 0 cs ch <- reverseCommitsOntoChunks cs0 cs - pure (n,ch) + pure (n, ch) --- | Variant of 'runOnto' that conses the additional chunks --- in reverse order. +{- | Variant of 'runOnto' that conses the additional chunks +in reverse order. +-} reversedOnto :: - Int -- ^ Size of initial chunk (use 4080 if uncertain) - -> Builder -- ^ Builder - -> Chunks - -> Chunks -reversedOnto hint@(I# hint# ) (Builder f) cs0 = runST $ do + -- | Size of initial chunk (use 4080 if uncertain) + Int -> + -- | Builder + Builder -> + Chunks -> + Chunks +reversedOnto hint@(I# hint#) (Builder f) cs0 = runST $ do MutableByteArray buf0 <- PM.newByteArray hint cs <- ST $ \s0 -> case f buf0 0# hint# Initial s0 of (# s1, bufX, offX, _, csX #) -> (# s1, Mutable bufX offX csX #) commitsOntoChunks cs0 cs --- | Run a builder against lots of elements. This fills the same --- underlying buffer over and over again. Do not let the argument to --- the callback escape from the callback (i.e. do not write it to an --- @IORef@). Also, do not @unsafeFreezeByteArray@ any of the mutable --- byte arrays in the callback. The intent is that the callback will --- write the buffer out. -putMany :: Foldable f - => Int -- ^ Size of shared chunk (use 8176 if uncertain) - -> (a -> Builder) -- ^ Value builder - -> f a -- ^ Collection of values - -> (MutableBytes RealWorld -> IO b) -- ^ Consume chunks. - -> IO () -{-# inline putMany #-} +{- | Run a builder against lots of elements. This fills the same +underlying buffer over and over again. Do not let the argument to +the callback escape from the callback (i.e. do not write it to an +@IORef@). Also, do not @unsafeFreezeByteArray@ any of the mutable +byte arrays in the callback. The intent is that the callback will +write the buffer out. +-} +putMany :: + (Foldable f) => + -- | Size of shared chunk (use 8176 if uncertain) + Int -> + -- | Value builder + (a -> Builder) -> + -- | Collection of values + f a -> + -- | Consume chunks. + (MutableBytes RealWorld -> IO b) -> + IO () +{-# INLINE putMany #-} putMany hint0 g xs cb = do MutableByteArray buf0 <- PM.newByteArray hint - BuilderState bufZ offZ _ cmtsZ <- foldlM - (\st0 a -> do - st1@(BuilderState buf off _ cmts) <- pasteIO (g a) st0 - case cmts of - Initial -> if I# off < threshold - then pure st1 - else do - _ <- cb (MutableBytes (MutableByteArray buf) 0 (I# off)) - pure (BuilderState buf0 0# hint# Initial) - _ -> do - let total = addCommitsLength (I# off) cmts - doff0 = total - I# off - large <- PM.newByteArray total - stToIO (PM.copyMutableByteArray large doff0 (MutableByteArray buf) 0 (I# off)) - r <- stToIO (copyReverseCommits large doff0 cmts) - case r of - 0 -> do - _ <- cb (MutableBytes large 0 total) - pure (BuilderState buf0 0# hint# Initial) - _ -> IO (\s0 -> Exts.raiseIO# putManyError s0) - ) (BuilderState buf0 0# hint# Initial) xs + BuilderState bufZ offZ _ cmtsZ <- + foldlM + ( \st0 a -> do + st1@(BuilderState buf off _ cmts) <- pasteIO (g a) st0 + case cmts of + Initial -> + if I# off < threshold + then pure st1 + else do + _ <- cb (MutableBytes (MutableByteArray buf) 0 (I# off)) + pure (BuilderState buf0 0# hint# Initial) + _ -> do + let total = addCommitsLength (I# off) cmts + doff0 = total - I# off + large <- PM.newByteArray total + stToIO (PM.copyMutableByteArray large doff0 (MutableByteArray buf) 0 (I# off)) + r <- stToIO (copyReverseCommits large doff0 cmts) + case r of + 0 -> do + _ <- cb (MutableBytes large 0 total) + pure (BuilderState buf0 0# hint# Initial) + _ -> IO (\s0 -> Exts.raiseIO# putManyError s0) + ) + (BuilderState buf0 0# hint# Initial) + xs _ <- case cmtsZ of Initial -> cb (MutableBytes (MutableByteArray bufZ) 0 (I# offZ)) _ -> IO (\s0 -> Exts.raiseIO# putManyError s0) pure () - where + where !hint@(I# hint#) = max hint0 8 !threshold = div (hint * 3) 4 putManyError :: SomeException -{-# noinline putManyError #-} -putManyError = toException - (userError "bytebuild: putMany implementation error") +{-# NOINLINE putManyError #-} +putManyError = + toException + (userError "bytebuild: putMany implementation error") --- | Variant of 'putMany' that prefixes each pushed array of chunks --- with the number of bytes that the chunks in each batch required. --- (This excludes the bytes required to encode the length itself.) --- This is useful for chunked HTTP encoding. -putManyConsLength :: (Foldable f, MonadIO m) - => Arithmetic.Nat n -- ^ Number of bytes used by the serialization of the length - -> (Int -> Bounded.Builder n) -- ^ Length serialization function - -> Int -- ^ Size of shared chunk (use 8176 if uncertain) - -> (a -> Builder) -- ^ Value builder - -> f a -- ^ Collection of values - -> (MutableBytes RealWorld -> m b) -- ^ Consume chunks. - -> m () -{-# inline putManyConsLength #-} +{- | Variant of 'putMany' that prefixes each pushed array of chunks +with the number of bytes that the chunks in each batch required. +(This excludes the bytes required to encode the length itself.) +This is useful for chunked HTTP encoding. +-} +putManyConsLength :: + (Foldable f, MonadIO m) => + -- | Number of bytes used by the serialization of the length + Arithmetic.Nat n -> + -- | Length serialization function + (Int -> Bounded.Builder n) -> + -- | Size of shared chunk (use 8176 if uncertain) + Int -> + -- | Value builder + (a -> Builder) -> + -- | Collection of values + f a -> + -- | Consume chunks. + (MutableBytes RealWorld -> m b) -> + m () +{-# INLINE putManyConsLength #-} putManyConsLength n buildSize hint g xs cb = do - let !(I# n# ) = Nat.demote n - let !(I# actual# ) = max hint (I# n# ) + let !(I# n#) = Nat.demote n + let !(I# actual#) = max hint (I# n#) let !threshold = div (I# actual# * 3) 4 - MutableByteArray buf0 <- liftIO (PM.newByteArray (I# actual# )) - BuilderState bufZ offZ _ cmtsZ <- foldlM - (\st0 a -> do - st1@(BuilderState buf off _ cmts) <- liftIO (pasteIO (g a) st0) - case cmts of - Initial -> if I# off < threshold - then pure st1 - else do - let !dist = off -# n# - _ <- liftIO $ stToIO $ UnsafeBounded.pasteST - (buildSize (fromIntegral (I# dist))) - (MutableByteArray buf0) 0 - _ <- cb (MutableBytes (MutableByteArray buf) 0 (I# off)) - pure (BuilderState buf0 n# (actual# -# n# ) Initial) - _ -> do - let !dist = commitDistance1 buf0 n# buf off cmts - _ <- liftIO $ stToIO $ UnsafeBounded.pasteST - (buildSize (fromIntegral (I# dist))) - (MutableByteArray buf0) 0 - let total = addCommitsLength (I# off) cmts - doff0 = total - I# off - large <- liftIO (PM.newByteArray total) - liftIO (stToIO (PM.copyMutableByteArray large doff0 (MutableByteArray buf) 0 (I# off))) - r <- liftIO (stToIO (copyReverseCommits large doff0 cmts)) - case r of - 0 -> do - _ <- cb (MutableBytes large 0 total) - pure (BuilderState buf0 n# (actual# -# n# ) Initial) - _ -> liftIO (IO (\s0 -> Exts.raiseIO# putManyError s0)) - ) (BuilderState buf0 n# (actual# -# n# ) Initial) xs + MutableByteArray buf0 <- liftIO (PM.newByteArray (I# actual#)) + BuilderState bufZ offZ _ cmtsZ <- + foldlM + ( \st0 a -> do + st1@(BuilderState buf off _ cmts) <- liftIO (pasteIO (g a) st0) + case cmts of + Initial -> + if I# off < threshold + then pure st1 + else do + let !dist = off -# n# + _ <- + liftIO $ + stToIO $ + UnsafeBounded.pasteST + (buildSize (fromIntegral (I# dist))) + (MutableByteArray buf0) + 0 + _ <- cb (MutableBytes (MutableByteArray buf) 0 (I# off)) + pure (BuilderState buf0 n# (actual# -# n#) Initial) + _ -> do + let !dist = commitDistance1 buf0 n# buf off cmts + _ <- + liftIO $ + stToIO $ + UnsafeBounded.pasteST + (buildSize (fromIntegral (I# dist))) + (MutableByteArray buf0) + 0 + let total = addCommitsLength (I# off) cmts + doff0 = total - I# off + large <- liftIO (PM.newByteArray total) + liftIO (stToIO (PM.copyMutableByteArray large doff0 (MutableByteArray buf) 0 (I# off))) + r <- liftIO (stToIO (copyReverseCommits large doff0 cmts)) + case r of + 0 -> do + _ <- cb (MutableBytes large 0 total) + pure (BuilderState buf0 n# (actual# -# n#) Initial) + _ -> liftIO (IO (\s0 -> Exts.raiseIO# putManyError s0)) + ) + (BuilderState buf0 n# (actual# -# n#) Initial) + xs _ <- case cmtsZ of Initial -> do let !distZ = offZ -# n# - _ <- liftIO $ stToIO $ UnsafeBounded.pasteST - (buildSize (fromIntegral (I# distZ))) - (MutableByteArray buf0) - 0 + _ <- + liftIO $ + stToIO $ + UnsafeBounded.pasteST + (buildSize (fromIntegral (I# distZ))) + (MutableByteArray buf0) + 0 cb (MutableBytes (MutableByteArray bufZ) 0 (I# offZ)) _ -> liftIO (IO (\s0 -> Exts.raiseIO# putManyError s0)) pure () --- | Convert a bounded builder to an unbounded one. If the size --- is a constant, use @Arithmetic.Nat.constant@ as the first argument --- to let GHC conjure up this value for you. +{- | Convert a bounded builder to an unbounded one. If the size +is a constant, use @Arithmetic.Nat.constant@ as the first argument +to let GHC conjure up this value for you. +-} fromBounded :: - Arithmetic.Nat n - -> Bounded.Builder n - -> Builder -{-# inline fromBounded #-} + Arithmetic.Nat n -> + Bounded.Builder n -> + Builder +{-# INLINE fromBounded #-} fromBounded n (UnsafeBounded.Builder f) = Builder $ \buf0 off0 len0 cs0 s0 -> let !(I# req) = Nat.demote n !(# s1, buf1, off1, len1, cs1 #) = case len0 >=# req of 1# -> (# s0, buf0, off0, len0, cs0 #) - _ -> let !(I# lenX) = max 4080 (I# req) in - case Exts.newByteArray# lenX s0 of - (# sX, bufX #) -> - (# sX, bufX, 0#, lenX, Mutable buf0 off0 cs0 #) + _ -> + let !(I# lenX) = max 4080 (I# req) + in case Exts.newByteArray# lenX s0 of + (# sX, bufX #) -> + (# sX, bufX, 0#, lenX, Mutable buf0 off0 cs0 #) in case f buf1 off1 s1 of (# s2, off2 #) -> (# s2, buf1, off2, len1 -# (off2 -# off1), cs1 #) @@ -382,9 +457,9 @@ fromBounded n (UnsafeBounded.Builder f) = Builder $ \buf0 off0 len0 cs0 s0 -> -- Use this instead of fromBounded (where possible) leads to marginally -- better results in benchmarks. fromBoundedOne :: - Bounded.Builder 1 - -> Builder -{-# inline fromBoundedOne #-} + Bounded.Builder 1 -> + Builder +{-# INLINE fromBoundedOne #-} fromBoundedOne (UnsafeBounded.Builder f) = Builder $ \buf0 off0 len0 cs0 s0 -> let !(# s1, buf1, off1, len1, cs1 #) = case len0 of 0# -> case Exts.newByteArray# 4080# s0 of @@ -401,31 +476,35 @@ byteArray a = bytes (Bytes a 0 (PM.sizeofByteArray a)) -- | Create a builder from a short bytestring. Implemented with 'bytes'. shortByteString :: ShortByteString -> Builder shortByteString (SBS x) = bytes (Bytes a 0 (PM.sizeofByteArray a)) - where a = ByteArray x + where + a = ByteArray x --- | Create a builder from a sliced byte sequence. The variants --- 'copy' and 'insert' provide more control over whether or not --- the byte sequence is copied or aliased. This function is preferred --- when the user does not know the size of the byte sequence. +{- | Create a builder from a sliced byte sequence. The variants +'copy' and 'insert' provide more control over whether or not +the byte sequence is copied or aliased. This function is preferred +when the user does not know the size of the byte sequence. +-} bytes :: Bytes -> Builder -bytes (Bytes (ByteArray src# ) (I# soff# ) (I# slen# )) = Builder - -- There are three cases to consider: (1) there is not enough - -- space and (1a) the chunk is not small or (1b) the chunk is - -- small; (2) There is enough space for a copy. - (\buf0 off0 len0 cs0 s0 -> case len0 <# slen# of - 1# -> case slen# >=# 256# of - 1# -> case Exts.newByteArray# 0# s0 of - (# s1, buf1 #) -> (# s1, buf1, 0#, 0#, Immutable src# soff# slen# (Mutable buf0 off0 cs0) #) - _ -> case Exts.newByteArray# 4080# s0 of - (# s1, buf1 #) -> case Op.copyByteArray# src# soff# buf1 0# slen# s1 of - s2 -> (# s2, buf1, slen#, 4080# -# slen#, Mutable buf0 off0 cs0 #) - _ -> let s1 = Op.copyByteArray# src# soff# buf0 off0 slen# s0 in - (# s1, buf0, off0 +# slen#, len0 -# slen#, cs0 #) - ) +bytes (Bytes (ByteArray src#) (I# soff#) (I# slen#)) = + Builder + -- There are three cases to consider: (1) there is not enough + -- space and (1a) the chunk is not small or (1b) the chunk is + -- small; (2) There is enough space for a copy. + ( \buf0 off0 len0 cs0 s0 -> case len0 <# slen# of + 1# -> case slen# >=# 256# of + 1# -> case Exts.newByteArray# 0# s0 of + (# s1, buf1 #) -> (# s1, buf1, 0#, 0#, Immutable src# soff# slen# (Mutable buf0 off0 cs0) #) + _ -> case Exts.newByteArray# 4080# s0 of + (# s1, buf1 #) -> case Op.copyByteArray# src# soff# buf1 0# slen# s1 of + s2 -> (# s2, buf1, slen#, 4080# -# slen#, Mutable buf0 off0 cs0 #) + _ -> + let s1 = Op.copyByteArray# src# soff# buf0 off0 slen# s0 + in (# s1, buf0, off0 +# slen#, len0 -# slen#, cs0 #) + ) -- | Paste byte chunks into a builder. chunks :: Chunks -> Builder -{-# noinline chunks #-} +{-# NOINLINE chunks #-} chunks xs0 = -- Implementation note: It would probably be good to begin with a -- goCopying phase before switching to goInserting. If the total @@ -434,8 +513,8 @@ chunks xs0 = -- Note: This function needs a test in the test suite. Builder $ \buf0 off0 len0 cs0 s0 -> case xs0 of ChunksNil -> (# s0, buf0, off0, len0, cs0 #) - ChunksCons{} -> goInserting xs0 (Mutable buf0 off0 cs0) s0 - where + ChunksCons {} -> goInserting xs0 (Mutable buf0 off0 cs0) s0 + where -- Notice that goNoncopying does not take a buffer as an argument. At the -- very end, we create a 128-byte buffer with nothing in it and present -- that as the new buffer. We *cannot* simply reuse the old buffer with @@ -446,74 +525,84 @@ chunks xs0 = goInserting (ChunksCons (Bytes (ByteArray b) (I# off) (I# len)) ys) !cs s0 = goInserting ys (Immutable b off len cs) s0 --- | Create a builder from a byte sequence. This always results in a --- call to @memcpy@. This is beneficial when the byte sequence is --- known to be small (less than 256 bytes). +{- | Create a builder from a byte sequence. This always results in a +call to @memcpy@. This is beneficial when the byte sequence is +known to be small (less than 256 bytes). +-} copy :: Bytes -> Builder -copy (Bytes (ByteArray src# ) (I# soff# ) (I# slen# )) = Builder - (\buf0 off0 len0 cs0 s0 -> case len0 <# slen# of - 1# -> case Exts.newByteArray# newSz s0 of - (# s1, buf1 #) -> case Op.copyByteArray# src# soff# buf1 0# slen# s1 of - s2 -> (# s2, buf1, slen#, newSz -# slen#, Mutable buf0 off0 cs0 #) - _ -> let !s1 = Op.copyByteArray# src# soff# buf0 off0 slen# s0 in - (# s1, buf0, off0 +# slen#, len0 -# slen#, cs0 #) - ) - where +copy (Bytes (ByteArray src#) (I# soff#) (I# slen#)) = + Builder + ( \buf0 off0 len0 cs0 s0 -> case len0 <# slen# of + 1# -> case Exts.newByteArray# newSz s0 of + (# s1, buf1 #) -> case Op.copyByteArray# src# soff# buf1 0# slen# s1 of + s2 -> (# s2, buf1, slen#, newSz -# slen#, Mutable buf0 off0 cs0 #) + _ -> + let !s1 = Op.copyByteArray# src# soff# buf0 off0 slen# s0 + in (# s1, buf0, off0 +# slen#, len0 -# slen#, cs0 #) + ) + where !(I# newSz) = max (I# slen#) 4080 --- | Variant of 'copy' that additionally pastes an extra byte in --- front of the bytes. +{- | Variant of 'copy' that additionally pastes an extra byte in +front of the bytes. +-} copyCons :: Word8 -> Bytes -> Builder -copyCons (W8# w0) (Bytes (ByteArray src# ) (I# soff# ) (I# slen# )) = Builder - (\buf0 off0 len0 cs0 s0 -> case len0 <# (slen# +# 1#) of - 1# -> case Exts.newByteArray# newSz s0 of - (# s1, buf1 #) -> case Op.copyByteArray# src# soff# buf1 1# slen# s1 of - s2 -> case Exts.writeWord8Array# buf1 0# w0 s2 of - s3 -> (# s3, buf1, slen# +# 1#, newSz -# (slen# +# 1#), Mutable buf0 off0 cs0 #) - _ -> let !s1 = Op.copyByteArray# src# soff# buf0 (off0 +# 1#) slen# s0 - !s2 = Exts.writeWord8Array# buf0 off0 w0 s1 - in (# s2, buf0, off0 +# (slen# +# 1#), len0 -# (slen# +# 1#), cs0 #) - ) - where +copyCons (W8# w0) (Bytes (ByteArray src#) (I# soff#) (I# slen#)) = + Builder + ( \buf0 off0 len0 cs0 s0 -> case len0 <# (slen# +# 1#) of + 1# -> case Exts.newByteArray# newSz s0 of + (# s1, buf1 #) -> case Op.copyByteArray# src# soff# buf1 1# slen# s1 of + s2 -> case Exts.writeWord8Array# buf1 0# w0 s2 of + s3 -> (# s3, buf1, slen# +# 1#, newSz -# (slen# +# 1#), Mutable buf0 off0 cs0 #) + _ -> + let !s1 = Op.copyByteArray# src# soff# buf0 (off0 +# 1#) slen# s0 + !s2 = Exts.writeWord8Array# buf0 off0 w0 s1 + in (# s2, buf0, off0 +# (slen# +# 1#), len0 -# (slen# +# 1#), cs0 #) + ) + where !(I# newSz) = max ((I# slen#) + 1) 4080 cstring# :: Addr# -> Builder -{-# inline cstring# #-} +{-# INLINE cstring# #-} cstring# x = cstring (Exts.Ptr x) --- | Create a builder from a C string with explicit length. The builder --- must be executed before the C string is freed. +{- | Create a builder from a C string with explicit length. The builder +must be executed before the C string is freed. +-} cstringLen :: CStringLen -> Builder -cstringLen (Exts.Ptr src#, I# slen# ) = Builder - (\buf0 off0 len0 cs0 s0 -> case len0 <# slen# of - 1# -> case Exts.newByteArray# newSz s0 of - (# s1, buf1 #) -> case Exts.copyAddrToByteArray# src# buf1 0# slen# s1 of - s2 -> (# s2, buf1, slen#, newSz -# slen#, Mutable buf0 off0 cs0 #) - _ -> let !s1 = Exts.copyAddrToByteArray# src# buf0 off0 slen# s0 in - (# s1, buf0, off0 +# slen#, len0 -# slen#, cs0 #) - ) - where +cstringLen (Exts.Ptr src#, I# slen#) = + Builder + ( \buf0 off0 len0 cs0 s0 -> case len0 <# slen# of + 1# -> case Exts.newByteArray# newSz s0 of + (# s1, buf1 #) -> case Exts.copyAddrToByteArray# src# buf1 0# slen# s1 of + s2 -> (# s2, buf1, slen#, newSz -# slen#, Mutable buf0 off0 cs0 #) + _ -> + let !s1 = Exts.copyAddrToByteArray# src# buf0 off0 slen# s0 + in (# s1, buf0, off0 +# slen#, len0 -# slen#, cs0 #) + ) + where !(I# newSz) = max (I# slen#) 4080 --- | Encode seven bytes into eight so that the encoded form is eight-bit clean. --- Specifically segment the input bytes inot 7-bit groups (lowest-to-highest --- index byte, most-to-least significant bit within a byte), pads the last group --- with trailing zeros, and forms octects by prepending a zero to each group. --- --- The name was chosen because this pads the input bits with zeros on the right, --- and also because this was likely the originally-indended behavior of the --- SMILE standard (see 'sevenEightSmile'). Right padding the input bits to a --- multiple of seven, as in this variant, is consistent with base64 encodings --- (which encodes 3 bytes in 4) and base85 (which encodes 4 bytes in 5). +{- | Encode seven bytes into eight so that the encoded form is eight-bit clean. +Specifically segment the input bytes inot 7-bit groups (lowest-to-highest +index byte, most-to-least significant bit within a byte), pads the last group +with trailing zeros, and forms octects by prepending a zero to each group. + +The name was chosen because this pads the input bits with zeros on the right, +and also because this was likely the originally-indended behavior of the +SMILE standard (see 'sevenEightSmile'). Right padding the input bits to a +multiple of seven, as in this variant, is consistent with base64 encodings +(which encodes 3 bytes in 4) and base85 (which encodes 4 bytes in 5). +-} sevenEightRight :: Bytes -> Builder sevenEightRight bs0 = case toWord 0 0 bs0 of (0, _) -> mempty (len, w) -> go (len * 8) w <> sevenEightSmile (Bytes.unsafeDrop len bs0) - where + where go :: Int -> Word64 -> Builder go !nBits !_ | nBits <= 0 = mempty go !nBits !w = - let octet = (fromIntegral $ unsafeShiftR w (8*7+1)) .&. 0x7f + let octet = (fromIntegral $ unsafeShiftR w (8 * 7 + 1)) .&. 0x7f in word8 octet <> go (nBits - 7) (unsafeShiftL w 7) toWord :: Int -> Word64 -> Bytes -> (Int, Word64) toWord !i !acc !bs @@ -522,28 +611,29 @@ sevenEightRight bs0 = case toWord 0 0 bs0 of let b = fromIntegral @Word8 @Word64 $ Bytes.unsafeIndex bs 0 acc' = acc .|. unsafeShiftL b (fromIntegral $ 8 * (7 - i)) in if i < 7 - then toWord (i + 1) acc' (Bytes.unsafeDrop 1 bs) - else (i, acc) + then toWord (i + 1) acc' (Bytes.unsafeDrop 1 bs) + else (i, acc) --- | Encode seven bytes into eight so that the encoded form is eight-bit clean. --- Specifically segment the input bytes inot 7-bit groups (lowest-to-highest --- index byte, most-to-least significant bit within a byte), then pad each group --- with zeros on the left until each group is an octet. --- --- The name was chosen because this is the implementation that is used (probably --- unintentionally) in the reference SMILE implementation, and so is expected tp --- be accepted by existing SMILE consumers. +{- | Encode seven bytes into eight so that the encoded form is eight-bit clean. +Specifically segment the input bytes inot 7-bit groups (lowest-to-highest +index byte, most-to-least significant bit within a byte), then pad each group +with zeros on the left until each group is an octet. + +The name was chosen because this is the implementation that is used (probably +unintentionally) in the reference SMILE implementation, and so is expected tp +be accepted by existing SMILE consumers. +-} sevenEightSmile :: Bytes -> Builder sevenEightSmile bs0 = case toWord 0 0 bs0 of (0, _) -> mempty (len, w) -> go (len * 8) w <> sevenEightSmile (Bytes.unsafeDrop len bs0) - where + where go :: Int -> Word64 -> Builder go !nBits !w | nBits == 0 = mempty | nBits < 7 = go 7 (unsafeShiftR w (7 - nBits)) go !nBits !w = - let octet = (fromIntegral $ unsafeShiftR w (8*7+1)) .&. 0x7f + let octet = (fromIntegral $ unsafeShiftR w (8 * 7 + 1)) .&. 0x7f in word8 octet <> go (nBits - 7) (unsafeShiftL w 7) toWord :: Int -> Word64 -> Bytes -> (Int, Word64) toWord !i !acc !bs @@ -552,44 +642,51 @@ sevenEightSmile bs0 = case toWord 0 0 bs0 of let b = fromIntegral @Word8 @Word64 $ Bytes.unsafeIndex bs 0 acc' = acc .|. unsafeShiftL b (fromIntegral $ 8 * (7 - i)) in if i < 7 - then toWord (i + 1) acc' (Bytes.unsafeDrop 1 bs) - else (i, acc) + then toWord (i + 1) acc' (Bytes.unsafeDrop 1 bs) + else (i, acc) --- | Create a builder from two byte sequences. This always results in two --- calls to @memcpy@. This is beneficial when the byte sequences are --- known to be small (less than 256 bytes). +{- | Create a builder from two byte sequences. This always results in two +calls to @memcpy@. This is beneficial when the byte sequences are +known to be small (less than 256 bytes). +-} copy2 :: Bytes -> Bytes -> Builder -copy2 (Bytes (ByteArray srcA# ) (I# soffA# ) (I# slenA# )) - (Bytes (ByteArray srcB# ) (I# soffB# ) (I# slenB# )) = Builder - (\buf0 off0 len0 cs0 s0 -> case len0 <# slen# of - 1# -> case Exts.newByteArray# newSz s0 of - (# s1, buf1 #) -> case Op.copyByteArray# srcA# soffA# buf1 0# slenA# s1 of - s2 -> case Op.copyByteArray# srcB# soffB# buf1 slenA# slenB# s2 of - s3 -> (# s3, buf1, slen#, newSz -# slen#, Mutable buf0 off0 cs0 #) - _ -> let !s1 = Op.copyByteArray# srcA# soffA# buf0 off0 slenA# s0 - !s2 = Op.copyByteArray# srcB# soffB# buf0 (off0 +# slenA# ) slenB# s1 in - (# s2, buf0, off0 +# slen#, len0 -# slen#, cs0 #) - ) - where - !slen# = slenA# +# slenB# - !(I# newSz) = max (I# slen#) 4080 +copy2 + (Bytes (ByteArray srcA#) (I# soffA#) (I# slenA#)) + (Bytes (ByteArray srcB#) (I# soffB#) (I# slenB#)) = + Builder + ( \buf0 off0 len0 cs0 s0 -> case len0 <# slen# of + 1# -> case Exts.newByteArray# newSz s0 of + (# s1, buf1 #) -> case Op.copyByteArray# srcA# soffA# buf1 0# slenA# s1 of + s2 -> case Op.copyByteArray# srcB# soffB# buf1 slenA# slenB# s2 of + s3 -> (# s3, buf1, slen#, newSz -# slen#, Mutable buf0 off0 cs0 #) + _ -> + let !s1 = Op.copyByteArray# srcA# soffA# buf0 off0 slenA# s0 + !s2 = Op.copyByteArray# srcB# soffB# buf0 (off0 +# slenA#) slenB# s1 + in (# s2, buf0, off0 +# slen#, len0 -# slen#, cs0 #) + ) + where + !slen# = slenA# +# slenB# + !(I# newSz) = max (I# slen#) 4080 --- | Create a builder from a byte sequence. This never calls @memcpy@. --- Instead, it pushes a chunk that references the argument byte sequence. --- This wastes the remaining space in the active chunk, so it may adversely --- affect performance if used carelessly. See 'flush' for a way to mitigate --- this problem. This functions is most beneficial when the byte sequence --- is known to be large (more than 8192 bytes). +{- | Create a builder from a byte sequence. This never calls @memcpy@. +Instead, it pushes a chunk that references the argument byte sequence. +This wastes the remaining space in the active chunk, so it may adversely +affect performance if used carelessly. See 'flush' for a way to mitigate +this problem. This functions is most beneficial when the byte sequence +is known to be large (more than 8192 bytes). +-} insert :: Bytes -> Builder -insert (Bytes (ByteArray src# ) (I# soff# ) (I# slen# )) = Builder - (\buf0 off0 _ cs0 s0 -> case Exts.newByteArray# 0# s0 of - (# s1, buf1 #) -> - (# s1, buf1, 0#, 0#, Immutable src# soff# slen# (Mutable buf0 off0 cs0) #) - ) +insert (Bytes (ByteArray src#) (I# soff#) (I# slen#)) = + Builder + ( \buf0 off0 _ cs0 s0 -> case Exts.newByteArray# 0# s0 of + (# s1, buf1 #) -> + (# s1, buf1, 0#, 0#, Immutable src# soff# slen# (Mutable buf0 off0 cs0) #) + ) --- | Create a builder from a slice of an array of 'Word8'. There is the same --- as 'bytes' but is provided as a convenience for users working with different --- types. +{- | Create a builder from a slice of an array of 'Word8'. There is the same +as 'bytes' but is provided as a convenience for users working with different +types. +-} word8Array :: PrimArray Word8 -> Int -> Int -> Builder word8Array (PrimArray arr) off len = bytes (Bytes (ByteArray arr) off len) @@ -664,129 +761,134 @@ word16ArrayBE src@(PrimArray arr) soff0 slen0 = case targetByteOrder of word16ArraySwap :: PrimArray Word16 -> Int -> Int -> Builder word16ArraySwap src soff0 slen0 = fromFunction (slen0 * 2) (go (soff0 * 2) ((soff0 + slen0) * 2)) - where + where go :: Int -> Int -> MutableByteArray s -> Int -> ST s Int - go !soff !send !dst !doff = if soff < send - then do - let v0 = PM.indexPrimArray (asWord8s src) soff - v1 = PM.indexPrimArray (asWord8s src) (soff + 1) - PM.writeByteArray dst doff v1 - PM.writeByteArray dst (doff + 1) v0 - go (soff + 2) send dst (doff + 2) - else pure doff + go !soff !send !dst !doff = + if soff < send + then do + let v0 = PM.indexPrimArray (asWord8s src) soff + v1 = PM.indexPrimArray (asWord8s src) (soff + 1) + PM.writeByteArray dst doff v1 + PM.writeByteArray dst (doff + 1) v0 + go (soff + 2) send dst (doff + 2) + else pure doff word32ArraySwap :: PrimArray Word32 -> Int -> Int -> Builder word32ArraySwap src soff0 slen0 = fromFunction (slen0 * 4) (go (soff0 * 4) ((soff0 + slen0) * 4)) - where + where go :: Int -> Int -> MutableByteArray s -> Int -> ST s Int - go !soff !send !dst !doff = if soff < send - then do - let v0 = PM.indexPrimArray (asWord8s src) soff - v1 = PM.indexPrimArray (asWord8s src) (soff + 1) - v2 = PM.indexPrimArray (asWord8s src) (soff + 2) - v3 = PM.indexPrimArray (asWord8s src) (soff + 3) - PM.writeByteArray dst doff v3 - PM.writeByteArray dst (doff + 1) v2 - PM.writeByteArray dst (doff + 2) v1 - PM.writeByteArray dst (doff + 3) v0 - go (soff + 4) send dst (doff + 4) - else pure doff + go !soff !send !dst !doff = + if soff < send + then do + let v0 = PM.indexPrimArray (asWord8s src) soff + v1 = PM.indexPrimArray (asWord8s src) (soff + 1) + v2 = PM.indexPrimArray (asWord8s src) (soff + 2) + v3 = PM.indexPrimArray (asWord8s src) (soff + 3) + PM.writeByteArray dst doff v3 + PM.writeByteArray dst (doff + 1) v2 + PM.writeByteArray dst (doff + 2) v1 + PM.writeByteArray dst (doff + 3) v0 + go (soff + 4) send dst (doff + 4) + else pure doff word64ArraySwap :: PrimArray Word64 -> Int -> Int -> Builder word64ArraySwap src soff0 slen0 = fromFunction (slen0 * 8) (go (soff0 * 8) ((soff0 + slen0) * 8)) - where + where go :: Int -> Int -> MutableByteArray s -> Int -> ST s Int - go !soff !send !dst !doff = if soff < send - then do - let v0 = PM.indexPrimArray (asWord8s src) soff - v1 = PM.indexPrimArray (asWord8s src) (soff + 1) - v2 = PM.indexPrimArray (asWord8s src) (soff + 2) - v3 = PM.indexPrimArray (asWord8s src) (soff + 3) - v4 = PM.indexPrimArray (asWord8s src) (soff + 4) - v5 = PM.indexPrimArray (asWord8s src) (soff + 5) - v6 = PM.indexPrimArray (asWord8s src) (soff + 6) - v7 = PM.indexPrimArray (asWord8s src) (soff + 7) - PM.writeByteArray dst doff v7 - PM.writeByteArray dst (doff + 1) v6 - PM.writeByteArray dst (doff + 2) v5 - PM.writeByteArray dst (doff + 3) v4 - PM.writeByteArray dst (doff + 4) v3 - PM.writeByteArray dst (doff + 5) v2 - PM.writeByteArray dst (doff + 6) v1 - PM.writeByteArray dst (doff + 7) v0 - go (soff + 8) send dst (doff + 8) - else pure doff + go !soff !send !dst !doff = + if soff < send + then do + let v0 = PM.indexPrimArray (asWord8s src) soff + v1 = PM.indexPrimArray (asWord8s src) (soff + 1) + v2 = PM.indexPrimArray (asWord8s src) (soff + 2) + v3 = PM.indexPrimArray (asWord8s src) (soff + 3) + v4 = PM.indexPrimArray (asWord8s src) (soff + 4) + v5 = PM.indexPrimArray (asWord8s src) (soff + 5) + v6 = PM.indexPrimArray (asWord8s src) (soff + 6) + v7 = PM.indexPrimArray (asWord8s src) (soff + 7) + PM.writeByteArray dst doff v7 + PM.writeByteArray dst (doff + 1) v6 + PM.writeByteArray dst (doff + 2) v5 + PM.writeByteArray dst (doff + 3) v4 + PM.writeByteArray dst (doff + 4) v3 + PM.writeByteArray dst (doff + 5) v2 + PM.writeByteArray dst (doff + 6) v1 + PM.writeByteArray dst (doff + 7) v0 + go (soff + 8) send dst (doff + 8) + else pure doff word128ArraySwap :: PrimArray Word128 -> Int -> Int -> Builder word128ArraySwap src soff0 slen0 = fromFunction (slen0 * 16) (go (soff0 * 16) ((soff0 + slen0) * 16)) - where + where -- TODO: Perhaps we could put byteswapping functions to use -- rather than indexing tons of Word8s. This could be done -- both here and in the other swap functions. There are a -- decent number of tests for these array-swapping functions, -- which makes changing this less scary. go :: Int -> Int -> MutableByteArray s -> Int -> ST s Int - go !soff !send !dst !doff = if soff < send - then do - let v0 = PM.indexPrimArray (asWord8s src) soff - v1 = PM.indexPrimArray (asWord8s src) (soff + 1) - v2 = PM.indexPrimArray (asWord8s src) (soff + 2) - v3 = PM.indexPrimArray (asWord8s src) (soff + 3) - v4 = PM.indexPrimArray (asWord8s src) (soff + 4) - v5 = PM.indexPrimArray (asWord8s src) (soff + 5) - v6 = PM.indexPrimArray (asWord8s src) (soff + 6) - v7 = PM.indexPrimArray (asWord8s src) (soff + 7) - v8 = PM.indexPrimArray (asWord8s src) (soff + 8) - v9 = PM.indexPrimArray (asWord8s src) (soff + 9) - v10 = PM.indexPrimArray (asWord8s src) (soff + 10) - v11 = PM.indexPrimArray (asWord8s src) (soff + 11) - v12 = PM.indexPrimArray (asWord8s src) (soff + 12) - v13 = PM.indexPrimArray (asWord8s src) (soff + 13) - v14 = PM.indexPrimArray (asWord8s src) (soff + 14) - v15 = PM.indexPrimArray (asWord8s src) (soff + 15) - PM.writeByteArray dst doff v15 - PM.writeByteArray dst (doff + 1) v14 - PM.writeByteArray dst (doff + 2) v13 - PM.writeByteArray dst (doff + 3) v12 - PM.writeByteArray dst (doff + 4) v11 - PM.writeByteArray dst (doff + 5) v10 - PM.writeByteArray dst (doff + 6) v9 - PM.writeByteArray dst (doff + 7) v8 - PM.writeByteArray dst (doff + 8) v7 - PM.writeByteArray dst (doff + 9) v6 - PM.writeByteArray dst (doff + 10) v5 - PM.writeByteArray dst (doff + 11) v4 - PM.writeByteArray dst (doff + 12) v3 - PM.writeByteArray dst (doff + 13) v2 - PM.writeByteArray dst (doff + 14) v1 - PM.writeByteArray dst (doff + 15) v0 - go (soff + 16) send dst (doff + 16) - else pure doff + go !soff !send !dst !doff = + if soff < send + then do + let v0 = PM.indexPrimArray (asWord8s src) soff + v1 = PM.indexPrimArray (asWord8s src) (soff + 1) + v2 = PM.indexPrimArray (asWord8s src) (soff + 2) + v3 = PM.indexPrimArray (asWord8s src) (soff + 3) + v4 = PM.indexPrimArray (asWord8s src) (soff + 4) + v5 = PM.indexPrimArray (asWord8s src) (soff + 5) + v6 = PM.indexPrimArray (asWord8s src) (soff + 6) + v7 = PM.indexPrimArray (asWord8s src) (soff + 7) + v8 = PM.indexPrimArray (asWord8s src) (soff + 8) + v9 = PM.indexPrimArray (asWord8s src) (soff + 9) + v10 = PM.indexPrimArray (asWord8s src) (soff + 10) + v11 = PM.indexPrimArray (asWord8s src) (soff + 11) + v12 = PM.indexPrimArray (asWord8s src) (soff + 12) + v13 = PM.indexPrimArray (asWord8s src) (soff + 13) + v14 = PM.indexPrimArray (asWord8s src) (soff + 14) + v15 = PM.indexPrimArray (asWord8s src) (soff + 15) + PM.writeByteArray dst doff v15 + PM.writeByteArray dst (doff + 1) v14 + PM.writeByteArray dst (doff + 2) v13 + PM.writeByteArray dst (doff + 3) v12 + PM.writeByteArray dst (doff + 4) v11 + PM.writeByteArray dst (doff + 5) v10 + PM.writeByteArray dst (doff + 6) v9 + PM.writeByteArray dst (doff + 7) v8 + PM.writeByteArray dst (doff + 8) v7 + PM.writeByteArray dst (doff + 9) v6 + PM.writeByteArray dst (doff + 10) v5 + PM.writeByteArray dst (doff + 11) v4 + PM.writeByteArray dst (doff + 12) v3 + PM.writeByteArray dst (doff + 13) v2 + PM.writeByteArray dst (doff + 14) v1 + PM.writeByteArray dst (doff + 15) v0 + go (soff + 16) send dst (doff + 16) + else pure doff word256ArraySwap :: PrimArray Word256 -> Int -> Int -> Builder word256ArraySwap src soff0 slen0 = fromFunction (slen0 * 32) (go (soff0 * 32) ((soff0 + slen0) * 32)) - where + where -- TODO: Perhaps we could put byteswapping functions to use -- rather than indexing tons of Word8s. This could be done -- both here and in the other swap functions. There are a -- decent number of tests for these array-swapping functions, -- which makes changing this less scary. go :: Int -> Int -> MutableByteArray s -> Int -> ST s Int - go !soff !send !dst !doff = if soff < send - then do - let loop !i - | i < 32 = do - let v = PM.indexPrimArray (asWord8s src) (soff + i) - PM.writeByteArray dst (doff + (31 - i)) v - loop (i + 1) - | otherwise = pure () - loop 0 - go (soff + 32) send dst (doff + 32) - else pure doff + go !soff !send !dst !doff = + if soff < send + then do + let loop !i + | i < 32 = do + let v = PM.indexPrimArray (asWord8s src) (soff + i) + PM.writeByteArray dst (doff + (31 - i)) v + loop (i + 1) + | otherwise = pure () + loop 0 + go (soff + 32) send dst (doff + 32) + else pure doff asWord8s :: PrimArray a -> PrimArray Word8 asWord8s (PrimArray x) = PrimArray x @@ -794,38 +896,43 @@ asWord8s (PrimArray x) = PrimArray x -- Internal function. Precondition, the referenced slice of the -- byte sequence is UTF-8 encoded text. slicedUtf8TextJson :: ByteArray# -> Int# -> Int# -> Builder -{-# noinline slicedUtf8TextJson #-} -slicedUtf8TextJson !src# !soff0# !slen0# = fromFunction# reqLen# - ( \dst# doff0# s0# -> pasteUtf8TextJson# src# soff0# slen0# dst# doff0# s0# ) - where +{-# NOINLINE slicedUtf8TextJson #-} +slicedUtf8TextJson !src# !soff0# !slen0# = + fromFunction# + reqLen# + (\dst# doff0# s0# -> pasteUtf8TextJson# src# soff0# slen0# dst# doff0# s0#) + where -- We multiply by 6 because, in the worst case, everything might be in the -- unprintable ASCII range. The plus 2 is for the quotes on the ends. - !reqLen# = (6# *# slen0# ) +# 2# + !reqLen# = (6# *# slen0#) +# 2# --- | Constructor for 'Builder' that works on a function with lifted --- arguments instead of unlifted ones. This is just as unsafe as the --- actual constructor. +{- | Constructor for 'Builder' that works on a function with lifted +arguments instead of unlifted ones. This is just as unsafe as the +actual constructor. +-} fromFunction :: Int -> (forall s. MutableByteArray s -> Int -> ST s Int) -> Builder -{-# inline fromFunction #-} +{-# INLINE fromFunction #-} fromFunction (I# req) f = Builder $ \buf0 off0 len0 cs0 s0 -> let !(# s1, buf1, off1, len1, cs1 #) = case len0 >=# req of 1# -> (# s0, buf0, off0, len0, cs0 #) - _ -> let !(I# lenX) = max 4080 (I# req) in - case Exts.newByteArray# lenX s0 of - (# sX, bufX #) -> - (# sX, bufX, 0#, lenX, Mutable buf0 off0 cs0 #) + _ -> + let !(I# lenX) = max 4080 (I# req) + in case Exts.newByteArray# lenX s0 of + (# sX, bufX #) -> + (# sX, bufX, 0#, lenX, Mutable buf0 off0 cs0 #) in case unST (f (MutableByteArray buf1) (I# off1)) s1 of (# s2, I# off2 #) -> (# s2, buf1, off2, len1 -# (off2 -# off1), cs1 #) -fromFunction# :: Int# -> (forall s. MutableByteArray# s -> Int# -> State# s -> (# State# s, Int# #) ) -> Builder -{-# inline fromFunction# #-} +fromFunction# :: Int# -> (forall s. MutableByteArray# s -> Int# -> State# s -> (# State# s, Int# #)) -> Builder +{-# INLINE fromFunction# #-} fromFunction# req f = Builder $ \buf0 off0 len0 cs0 s0 -> let !(# s1, buf1, off1, len1, cs1 #) = case len0 >=# req of 1# -> (# s0, buf0, off0, len0, cs0 #) - _ -> let !(I# lenX) = max 4080 (I# req) in - case Exts.newByteArray# lenX s0 of - (# sX, bufX #) -> - (# sX, bufX, 0#, lenX, Mutable buf0 off0 cs0 #) + _ -> + let !(I# lenX) = max 4080 (I# req) + in case Exts.newByteArray# lenX s0 of + (# sX, bufX #) -> + (# sX, bufX, 0#, lenX, Mutable buf0 off0 cs0 #) in case f buf1 off1 s1 of (# s2, off2 #) -> (# s2, buf1, off2, len1 -# (off2 -# off1), cs1 #) @@ -835,192 +942,222 @@ shortTextUtf8 a = let ba = shortTextToByteArray a in bytes (Bytes ba 0 (PM.sizeofByteArray ba)) -#if MIN_VERSION_text(2,0,0) -- | Create a builder from text. The text will be UTF-8 encoded. textUtf8 :: Text -> Builder textUtf8 (I.Text (A.ByteArray b) off len) = bytes (Bytes (ByteArray b) off len) -#endif --- | Create a builder from text. The text will be UTF-8 encoded, --- and JSON special characters will be escaped. Additionally, the --- result is surrounded by double quotes. For example: --- --- * @foo ==\> "foo"@ (no escape sequences) --- * @\\_"_\/ ==\> "\\\\_\\"_\/"@ (escapes backslashes and quotes) --- * @hello\world ==> "hello\\u001Bworld"@ (where @\@ is code point 0x1B) +{- | Create a builder from text. The text will be UTF-8 encoded, +and JSON special characters will be escaped. Additionally, the +result is surrounded by double quotes. For example: + +* @foo ==\> "foo"@ (no escape sequences) +* @\\_"_\/ ==\> "\\\\_\\"_\/"@ (escapes backslashes and quotes) +* @hello\world ==> "hello\\u001Bworld"@ (where @\@ is code point 0x1B) +-} shortTextJsonString :: ShortText -> Builder -{-# inline shortTextJsonString #-} +{-# INLINE shortTextJsonString #-} shortTextJsonString a = let !(ByteArray ba) = shortTextToByteArray a !(I# len) = PM.sizeofByteArray (ByteArray ba) in slicedUtf8TextJson ba 0# len --- | Encodes an unsigned 64-bit integer as decimal. --- This encoding never starts with a zero unless the --- argument was zero. +textJsonString :: Text -> Builder +{-# INLINE textJsonString #-} +textJsonString (I.Text (A.ByteArray ba) (I# off) (I# len)) = slicedUtf8TextJson ba off len + +{- | Encodes an unsigned 64-bit integer as decimal. +This encoding never starts with a zero unless the +argument was zero. +-} word64Dec :: Word64 -> Builder word64Dec w = fromBounded Nat.constant (Bounded.word64Dec w) --- | Encodes an unsigned 16-bit integer as decimal. --- This encoding never starts with a zero unless the --- argument was zero. +{- | Encodes an unsigned 16-bit integer as decimal. +This encoding never starts with a zero unless the +argument was zero. +-} word32Dec :: Word32 -> Builder word32Dec w = fromBounded Nat.constant (Bounded.word32Dec w) --- | Encodes an unsigned 16-bit integer as decimal. --- This encoding never starts with a zero unless the --- argument was zero. +{- | Encodes an unsigned 16-bit integer as decimal. +This encoding never starts with a zero unless the +argument was zero. +-} word16Dec :: Word16 -> Builder word16Dec w = fromBounded Nat.constant (Bounded.word16Dec w) --- | Encodes an unsigned 8-bit integer as decimal. --- This encoding never starts with a zero unless the --- argument was zero. +{- | Encodes an unsigned 8-bit integer as decimal. +This encoding never starts with a zero unless the +argument was zero. +-} word8Dec :: Word8 -> Builder word8Dec w = fromBounded Nat.constant (Bounded.word8Dec w) --- | Encodes an unsigned machine-sized integer as decimal. --- This encoding never starts with a zero unless the --- argument was zero. +{- | Encodes an unsigned machine-sized integer as decimal. +This encoding never starts with a zero unless the +argument was zero. +-} wordDec :: Word -> Builder wordDec w = fromBounded Nat.constant (Bounded.wordDec w) --- | Encode a double-floating-point number, using decimal notation or --- scientific notation depending on the magnitude. This has undefined --- behavior when representing @+inf@, @-inf@, and @NaN@. It will not --- crash, but the generated numbers will be nonsense. +{- | Encode a double-floating-point number, using decimal notation or +scientific notation depending on the magnitude. This has undefined +behavior when representing @+inf@, @-inf@, and @NaN@. It will not +crash, but the generated numbers will be nonsense. +-} doubleDec :: Double -> Builder doubleDec w = fromBounded Nat.constant (Bounded.doubleDec w) --- | Encodes a signed 64-bit integer as decimal. --- This encoding never starts with a zero unless the argument was zero. --- Negative numbers are preceded by a minus sign. Positive numbers --- are not preceded by anything. +{- | Encodes a signed 64-bit integer as decimal. +This encoding never starts with a zero unless the argument was zero. +Negative numbers are preceded by a minus sign. Positive numbers +are not preceded by anything. +-} int64Dec :: Int64 -> Builder int64Dec w = fromBounded Nat.constant (Bounded.int64Dec w) --- | Encodes a signed 32-bit integer as decimal. --- This encoding never starts with a zero unless the argument was zero. --- Negative numbers are preceded by a minus sign. Positive numbers --- are not preceded by anything. +{- | Encodes a signed 32-bit integer as decimal. +This encoding never starts with a zero unless the argument was zero. +Negative numbers are preceded by a minus sign. Positive numbers +are not preceded by anything. +-} int32Dec :: Int32 -> Builder int32Dec w = fromBounded Nat.constant (Bounded.int32Dec w) --- | Encodes a signed 16-bit integer as decimal. --- This encoding never starts with a zero unless the argument was zero. --- Negative numbers are preceded by a minus sign. Positive numbers --- are not preceded by anything. +{- | Encodes a signed 16-bit integer as decimal. +This encoding never starts with a zero unless the argument was zero. +Negative numbers are preceded by a minus sign. Positive numbers +are not preceded by anything. +-} int16Dec :: Int16 -> Builder int16Dec w = fromBounded Nat.constant (Bounded.int16Dec w) --- | Encodes a signed 8-bit integer as decimal. --- This encoding never starts with a zero unless the argument was zero. --- Negative numbers are preceded by a minus sign. Positive numbers --- are not preceded by anything. +{- | Encodes a signed 8-bit integer as decimal. +This encoding never starts with a zero unless the argument was zero. +Negative numbers are preceded by a minus sign. Positive numbers +are not preceded by anything. +-} int8Dec :: Int8 -> Builder int8Dec w = fromBounded Nat.constant (Bounded.int8Dec w) --- | Encodes a signed machine-sized integer as decimal. --- This encoding never starts with a zero unless the argument was zero. --- Negative numbers are preceded by a minus sign. Positive numbers --- are not preceded by anything. +{- | Encodes a signed machine-sized integer as decimal. +This encoding never starts with a zero unless the argument was zero. +Negative numbers are preceded by a minus sign. Positive numbers +are not preceded by anything. +-} intDec :: Int -> Builder intDec w = fromBounded Nat.constant (Bounded.intDec w) --- | Encode a 64-bit unsigned integer as hexadecimal, zero-padding --- the encoding to 16 digits. This uses uppercase for the alphabetical --- digits. For example, this encodes the number 1022 as @00000000000003FE@. +{- | Encode a 64-bit unsigned integer as hexadecimal, zero-padding +the encoding to 16 digits. This uses uppercase for the alphabetical +digits. For example, this encodes the number 1022 as @00000000000003FE@. +-} word64PaddedUpperHex :: Word64 -> Builder word64PaddedUpperHex w = fromBounded Nat.constant (Bounded.word64PaddedUpperHex w) --- | Encode a 32-bit unsigned integer as hexadecimal, zero-padding --- the encoding to 8 digits. This uses uppercase for the alphabetical --- digits. For example, this encodes the number 1022 as @000003FE@. +{- | Encode a 32-bit unsigned integer as hexadecimal, zero-padding +the encoding to 8 digits. This uses uppercase for the alphabetical +digits. For example, this encodes the number 1022 as @000003FE@. +-} word32PaddedUpperHex :: Word32 -> Builder word32PaddedUpperHex w = fromBounded Nat.constant (Bounded.word32PaddedUpperHex w) --- | Encode a 16-bit unsigned integer as hexadecimal, zero-padding --- the encoding to 4 digits. This uses uppercase for the alphabetical --- digits. For example, this encodes the number 1022 as @03FE@. +{- | Encode a 16-bit unsigned integer as hexadecimal, zero-padding +the encoding to 4 digits. This uses uppercase for the alphabetical +digits. For example, this encodes the number 1022 as @03FE@. +-} word16PaddedUpperHex :: Word16 -> Builder word16PaddedUpperHex w = fromBounded Nat.constant (Bounded.word16PaddedUpperHex w) --- | Encode a 16-bit unsigned integer as hexadecimal, zero-padding --- the encoding to 4 digits. This uses lowercase for the alphabetical --- digits. For example, this encodes the number 1022 as @03fe@. +{- | Encode a 16-bit unsigned integer as hexadecimal, zero-padding +the encoding to 4 digits. This uses lowercase for the alphabetical +digits. For example, this encodes the number 1022 as @03fe@. +-} word16PaddedLowerHex :: Word16 -> Builder word16PaddedLowerHex w = fromBounded Nat.constant (Bounded.word16PaddedLowerHex w) --- | Encode a 16-bit unsigned integer as hexadecimal without leading --- zeroes. This uses lowercase for the alphabetical digits. For --- example, this encodes the number 1022 as @3fe@. +{- | Encode a 16-bit unsigned integer as hexadecimal without leading +zeroes. This uses lowercase for the alphabetical digits. For +example, this encodes the number 1022 as @3fe@. +-} word16LowerHex :: Word16 -> Builder word16LowerHex w = fromBounded Nat.constant (Bounded.word16LowerHex w) --- | Encode a 16-bit unsigned integer as hexadecimal without leading --- zeroes. This uses uppercase for the alphabetical digits. For --- example, this encodes the number 1022 as @3FE@. +{- | Encode a 16-bit unsigned integer as hexadecimal without leading +zeroes. This uses uppercase for the alphabetical digits. For +example, this encodes the number 1022 as @3FE@. +-} word16UpperHex :: Word16 -> Builder word16UpperHex w = fromBounded Nat.constant (Bounded.word16UpperHex w) --- | Encode a 16-bit unsigned integer as hexadecimal without leading --- zeroes. This uses lowercase for the alphabetical digits. For --- example, this encodes the number 1022 as @3FE@. +{- | Encode a 16-bit unsigned integer as hexadecimal without leading +zeroes. This uses lowercase for the alphabetical digits. For +example, this encodes the number 1022 as @3FE@. +-} word8LowerHex :: Word8 -> Builder word8LowerHex w = fromBounded Nat.constant (Bounded.word8LowerHex w) --- | Encode a 8-bit unsigned integer as hexadecimal, zero-padding --- the encoding to 2 digits. This uses uppercase for the alphabetical --- digits. For example, this encodes the number 11 as @0B@. +{- | Encode a 8-bit unsigned integer as hexadecimal, zero-padding +the encoding to 2 digits. This uses uppercase for the alphabetical +digits. For example, this encodes the number 11 as @0B@. +-} word8PaddedUpperHex :: Word8 -> Builder word8PaddedUpperHex w = fromBounded Nat.constant (Bounded.word8PaddedUpperHex w) --- | Encode an ASCII char. --- Precondition: Input must be an ASCII character. This is not checked. +{- | Encode an ASCII char. +Precondition: Input must be an ASCII character. This is not checked. +-} ascii :: Char -> Builder ascii c = fromBoundedOne (Bounded.ascii c) --- | Encode two ASCII characters. --- Precondition: Must be an ASCII characters. This is not checked. +{- | Encode two ASCII characters. +Precondition: Must be an ASCII characters. This is not checked. +-} ascii2 :: Char -> Char -> Builder ascii2 a b = fromBounded Nat.constant (Bounded.ascii2 a b) --- | Encode three ASCII characters. --- Precondition: Must be an ASCII characters. This is not checked. +{- | Encode three ASCII characters. +Precondition: Must be an ASCII characters. This is not checked. +-} ascii3 :: Char -> Char -> Char -> Builder ascii3 a b c = fromBounded Nat.constant (Bounded.ascii3 a b c) --- | Encode four ASCII characters. --- Precondition: Must be an ASCII characters. This is not checked. +{- | Encode four ASCII characters. +Precondition: Must be an ASCII characters. This is not checked. +-} ascii4 :: Char -> Char -> Char -> Char -> Builder ascii4 a b c d = fromBounded Nat.constant (Bounded.ascii4 a b c d) --- | Encode five ASCII characters. --- Precondition: Must be an ASCII characters. This is not checked. +{- | Encode five ASCII characters. +Precondition: Must be an ASCII characters. This is not checked. +-} ascii5 :: Char -> Char -> Char -> Char -> Char -> Builder ascii5 a b c d e = fromBounded Nat.constant (Bounded.ascii5 a b c d e) --- | Encode six ASCII characters. --- Precondition: Must be an ASCII characters. This is not checked. +{- | Encode six ASCII characters. +Precondition: Must be an ASCII characters. This is not checked. +-} ascii6 :: Char -> Char -> Char -> Char -> Char -> Char -> Builder ascii6 a b c d e f = fromBounded Nat.constant (Bounded.ascii6 a b c d e f) --- | Encode seven ASCII characters. --- Precondition: Must be an ASCII characters. This is not checked. +{- | Encode seven ASCII characters. +Precondition: Must be an ASCII characters. This is not checked. +-} ascii7 :: Char -> Char -> Char -> Char -> Char -> Char -> Char -> Builder ascii7 a b c d e f g = fromBounded Nat.constant (Bounded.ascii7 a b c d e f g) --- | Encode eight ASCII characters. --- Precondition: Must be an ASCII characters. This is not checked. +{- | Encode eight ASCII characters. +Precondition: Must be an ASCII characters. This is not checked. +-} ascii8 :: Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Builder ascii8 a b c d e f g h = fromBounded Nat.constant (Bounded.ascii8 a b c d e f g h) @@ -1031,83 +1168,99 @@ char c = fromBounded Nat.constant (Bounded.char c) unST :: ST s a -> State# s -> (# State# s, a #) unST (ST f) = f --- | Requires exactly 8 bytes. Dump the octets of a 64-bit --- signed integer in a little-endian fashion. +{- | Requires exactly 8 bytes. Dump the octets of a 64-bit +signed integer in a little-endian fashion. +-} int64LE :: Int64 -> Builder int64LE w = fromBounded Nat.constant (Bounded.int64LE w) --- | Requires exactly 4 bytes. Dump the octets of a 32-bit --- signed integer in a little-endian fashion. +{- | Requires exactly 4 bytes. Dump the octets of a 32-bit +signed integer in a little-endian fashion. +-} int32LE :: Int32 -> Builder int32LE w = fromBounded Nat.constant (Bounded.int32LE w) --- | Requires exactly 2 bytes. Dump the octets of a 16-bit --- signed integer in a little-endian fashion. +{- | Requires exactly 2 bytes. Dump the octets of a 16-bit +signed integer in a little-endian fashion. +-} int16LE :: Int16 -> Builder int16LE w = fromBounded Nat.constant (Bounded.int16LE w) --- | Requires exactly 8 bytes. Dump the octets of a 64-bit --- signed integer in a big-endian fashion. +{- | Requires exactly 8 bytes. Dump the octets of a 64-bit +signed integer in a big-endian fashion. +-} int64BE :: Int64 -> Builder int64BE w = fromBounded Nat.constant (Bounded.int64BE w) --- | Requires exactly 4 bytes. Dump the octets of a 32-bit --- signed integer in a big-endian fashion. +{- | Requires exactly 4 bytes. Dump the octets of a 32-bit +signed integer in a big-endian fashion. +-} int32BE :: Int32 -> Builder int32BE w = fromBounded Nat.constant (Bounded.int32BE w) --- | Requires exactly 2 bytes. Dump the octets of a 16-bit --- signed integer in a big-endian fashion. +{- | Requires exactly 2 bytes. Dump the octets of a 16-bit +signed integer in a big-endian fashion. +-} int16BE :: Int16 -> Builder int16BE w = fromBounded Nat.constant (Bounded.int16BE w) --- | Requires exactly 32 bytes. Dump the octets of a 256-bit --- word in a little-endian fashion. +{- | Requires exactly 32 bytes. Dump the octets of a 256-bit +word in a little-endian fashion. +-} word256LE :: Word256 -> Builder word256LE w = fromBounded Nat.constant (Bounded.word256LE w) --- | Requires exactly 16 bytes. Dump the octets of a 128-bit --- word in a little-endian fashion. +{- | Requires exactly 16 bytes. Dump the octets of a 128-bit +word in a little-endian fashion. +-} word128LE :: Word128 -> Builder word128LE w = fromBounded Nat.constant (Bounded.word128LE w) --- | Requires exactly 8 bytes. Dump the octets of a 64-bit --- word in a little-endian fashion. +{- | Requires exactly 8 bytes. Dump the octets of a 64-bit +word in a little-endian fashion. +-} word64LE :: Word64 -> Builder word64LE w = fromBounded Nat.constant (Bounded.word64LE w) --- | Requires exactly 4 bytes. Dump the octets of a 32-bit --- word in a little-endian fashion. +{- | Requires exactly 4 bytes. Dump the octets of a 32-bit +word in a little-endian fashion. +-} word32LE :: Word32 -> Builder word32LE w = fromBounded Nat.constant (Bounded.word32LE w) --- | Requires exactly 2 bytes. Dump the octets of a 16-bit --- word in a little-endian fashion. +{- | Requires exactly 2 bytes. Dump the octets of a 16-bit +word in a little-endian fashion. +-} word16LE :: Word16 -> Builder word16LE w = fromBounded Nat.constant (Bounded.word16LE w) --- | Requires exactly 32 bytes. Dump the octets of a 256-bit --- word in a big-endian fashion. +{- | Requires exactly 32 bytes. Dump the octets of a 256-bit +word in a big-endian fashion. +-} word256BE :: Word256 -> Builder word256BE w = fromBounded Nat.constant (Bounded.word256BE w) --- | Requires exactly 16 bytes. Dump the octets of a 128-bit --- word in a big-endian fashion. +{- | Requires exactly 16 bytes. Dump the octets of a 128-bit +word in a big-endian fashion. +-} word128BE :: Word128 -> Builder word128BE w = fromBounded Nat.constant (Bounded.word128BE w) --- | Requires exactly 8 bytes. Dump the octets of a 64-bit --- word in a big-endian fashion. +{- | Requires exactly 8 bytes. Dump the octets of a 64-bit +word in a big-endian fashion. +-} word64BE :: Word64 -> Builder word64BE w = fromBounded Nat.constant (Bounded.word64BE w) --- | Requires exactly 4 bytes. Dump the octets of a 32-bit --- word in a big-endian fashion. +{- | Requires exactly 4 bytes. Dump the octets of a 32-bit +word in a big-endian fashion. +-} word32BE :: Word32 -> Builder word32BE w = fromBounded Nat.constant (Bounded.word32BE w) --- | Requires exactly 2 bytes. Dump the octets of a 16-bit --- word in a big-endian fashion. +{- | Requires exactly 2 bytes. Dump the octets of a 16-bit +word in a big-endian fashion. +-} word16BE :: Word16 -> Builder word16BE w = fromBounded Nat.constant (Bounded.word16BE w) @@ -1117,11 +1270,14 @@ word8 w = fromBoundedOne (Bounded.word8 w) -- | Prefix a builder with the number of bytes that it requires. consLength :: - Arithmetic.Nat n -- ^ Number of bytes used by the serialization of the length - -> (Int -> Bounded.Builder n) -- ^ Length serialization function - -> Builder -- ^ Builder whose length is measured - -> Builder -{-# inline consLength #-} + -- | Number of bytes used by the serialization of the length + Arithmetic.Nat n -> + -- | Length serialization function + (Int -> Bounded.Builder n) -> + -- | Builder whose length is measured + Builder -> + Builder +{-# INLINE consLength #-} consLength !n buildSize (Builder f) = Builder $ \buf0 off0 len0 cs0 s0 -> -- There is actually a little bit of unsoundness here. If the number of -- bytes required to encode the length is greater than 4080, this will @@ -1135,170 +1291,200 @@ consLength !n buildSize (Builder f) = Builder $ \buf0 off0 len0 cs0 s0 -> in case f buf1 (off1 +# lenSz) (len1 -# lenSz) cs1 s1 of (# s2, buf2, off2, len2, cs2 #) -> let !dist = commitDistance1 buf1 (off1 +# lenSz) buf2 off2 cs2 - ST g = UnsafeBounded.pasteST - (buildSize (fromIntegral (I# dist))) - (MutableByteArray buf1) - (I# off1) + ST g = + UnsafeBounded.pasteST + (buildSize (fromIntegral (I# dist))) + (MutableByteArray buf1) + (I# off1) in case g s2 of (# s3, _ #) -> (# s3, buf2, off2, len2, cs2 #) --- | Variant of 'consLength32BE' the encodes the length in --- a little-endian fashion. +{- | Variant of 'consLength32BE' the encodes the length in +a little-endian fashion. +-} consLength32LE :: Builder -> Builder consLength32LE = consLength Nat.constant (\x -> Bounded.word32LE (fromIntegral x)) --- | Prefix a builder with its size in bytes. This size is --- presented as a big-endian 32-bit word. The need to prefix --- a builder with its length shows up a numbers of wire protocols --- including those of PostgreSQL and Apache Kafka. Note the --- equivalence: --- --- > forall (n :: Int) (x :: Builder). --- > let sz = sizeofByteArray (run n (consLength32BE x)) --- > consLength32BE x === word32BE (fromIntegral sz) <> x --- --- However, using 'consLength32BE' is much more efficient here --- since it only materializes the 'ByteArray' once. +{- | Prefix a builder with its size in bytes. This size is +presented as a big-endian 32-bit word. The need to prefix +a builder with its length shows up a numbers of wire protocols +including those of PostgreSQL and Apache Kafka. Note the +equivalence: + +> forall (n :: Int) (x :: Builder). +> let sz = sizeofByteArray (run n (consLength32BE x)) +> consLength32BE x === word32BE (fromIntegral sz) <> x + +However, using 'consLength32BE' is much more efficient here +since it only materializes the 'ByteArray' once. +-} consLength32BE :: Builder -> Builder consLength32BE = consLength Nat.constant (\x -> Bounded.word32BE (fromIntegral x)) --- | Prefix a builder with its size in bytes. This size is --- presented as a big-endian 64-bit word. See 'consLength32BE'. +{- | Prefix a builder with its size in bytes. This size is +presented as a big-endian 64-bit word. See 'consLength32BE'. +-} consLength64BE :: Builder -> Builder consLength64BE = consLength Nat.constant (\x -> Bounded.word64BE (fromIntegral x)) --- | Push the buffer currently being filled onto the chunk list, --- allocating a new active buffer of the requested size. This is --- helpful when a small builder is sandwhiched between two large --- zero-copy builders: --- --- > insert bigA <> flush 1 <> word8 0x42 <> insert bigB --- --- Without @flush 1@, @word8 0x42@ would see the zero-byte active --- buffer that 'insert' returned, decide that it needed more space, --- and allocate a 4080-byte buffer to which only a single byte --- would be written. +{- | Push the buffer currently being filled onto the chunk list, +allocating a new active buffer of the requested size. This is +helpful when a small builder is sandwhiched between two large +zero-copy builders: + +> insert bigA <> flush 1 <> word8 0x42 <> insert bigB + +Without @flush 1@, @word8 0x42@ would see the zero-byte active +buffer that 'insert' returned, decide that it needed more space, +and allocate a 4080-byte buffer to which only a single byte +would be written. +-} flush :: Int -> Builder flush !reqSz = Builder $ \buf0 off0 _ cs0 s0 -> case Exts.newByteArray# sz# s0 of (# sX, bufX #) -> (# sX, bufX, 0#, sz#, Mutable buf0 off0 cs0 #) - where - !(I# sz# ) = max reqSz 0 + where + !(I# sz#) = max reqSz 0 -- ShortText is already UTF-8 encoded. This is a no-op. shortTextToByteArray :: ShortText -> ByteArray shortTextToByteArray x = case TS.toShortByteString x of SBS a -> ByteArray a --- | Encode a signed machine-sized integer with LEB-128. This uses --- zig-zag encoding. +{- | Encode a signed machine-sized integer with LEB-128. This uses +zig-zag encoding. +-} intLEB128 :: Int -> Builder -{-# inline intLEB128 #-} +{-# INLINE intLEB128 #-} intLEB128 = wordLEB128 . toZigzagNative -- | Encode a 32-bit signed integer with LEB-128. This uses zig-zag encoding. int32LEB128 :: Int32 -> Builder -{-# inline int32LEB128 #-} +{-# INLINE int32LEB128 #-} int32LEB128 = word32LEB128 . toZigzag32 -- | Encode a 64-bit signed integer with LEB-128. This uses zig-zag encoding. int64LEB128 :: Int64 -> Builder -{-# inline int64LEB128 #-} +{-# INLINE int64LEB128 #-} int64LEB128 = word64LEB128 . toZigzag64 -- | Encode a machine-sized word with LEB-128. wordLEB128 :: Word -> Builder -{-# inline wordLEB128 #-} +{-# INLINE wordLEB128 #-} wordLEB128 w = fromBounded Nat.constant (Bounded.wordLEB128 w) +-- | Encode a 16-bit word with LEB-128. +word16LEB128 :: Word16 -> Builder +{-# INLINE word16LEB128 #-} +word16LEB128 w = fromBounded Nat.constant (Bounded.word16LEB128 w) + -- | Encode a 32-bit word with LEB-128. word32LEB128 :: Word32 -> Builder -{-# inline word32LEB128 #-} +{-# INLINE word32LEB128 #-} word32LEB128 w = fromBounded Nat.constant (Bounded.word32LEB128 w) -- | Encode a 64-bit word with LEB-128. word64LEB128 :: Word64 -> Builder -{-# inline word64LEB128 #-} +{-# INLINE word64LEB128 #-} word64LEB128 w = fromBounded Nat.constant (Bounded.word64LEB128 w) -- | Encode a machine-sized word with VLQ. wordVlq :: Word -> Builder -{-# inline wordVlq #-} +{-# INLINE wordVlq #-} wordVlq w = fromBounded Nat.constant (Bounded.wordVlq w) -- | Encode a 32-bit word with VLQ. word32Vlq :: Word32 -> Builder -{-# inline word32Vlq #-} +{-# INLINE word32Vlq #-} word32Vlq w = fromBounded Nat.constant (Bounded.word32Vlq w) -- | Encode a 64-bit word with VLQ. word64Vlq :: Word64 -> Builder -{-# inline word64Vlq #-} +{-# INLINE word64Vlq #-} word64Vlq w = fromBounded Nat.constant (Bounded.word64Vlq w) --- | Encode a signed arbitrary-precision integer as decimal. --- This encoding never starts with a zero unless the argument was zero. --- Negative numbers are preceded by a minus sign. Positive numbers --- are not preceded by anything. +{- | Encode a signed arbitrary-precision integer as decimal. +This encoding never starts with a zero unless the argument was zero. +Negative numbers are preceded by a minus sign. Positive numbers +are not preceded by anything. +-} integerDec :: Integer -> Builder integerDec !i | i < 0 = ascii '-' <> naturalDec (naturalFromInteger (negate i)) | otherwise = naturalDec (naturalFromInteger i) --- | Encodes an unsigned arbitrary-precision integer as decimal. --- This encoding never starts with a zero unless the argument was zero. +{- | Encodes an unsigned arbitrary-precision integer as decimal. +This encoding never starts with a zero unless the argument was zero. +-} naturalDec :: Natural -> Builder -naturalDec !n0 = fromEffect - (I# (11# +# (3# *# integerLog2# (naturalToInteger n0)))) - (\marr off -> case n0 of - 0 -> do - PM.writeByteArray marr off (0x30 :: Word8) - pure (off + 1) - _ -> go n0 marr off off - ) - where +naturalDec !n0 = + fromEffect + (I# (11# +# (3# *# integerLog2# (naturalToInteger n0)))) + ( \marr off -> case n0 of + 0 -> do + PM.writeByteArray marr off (0x30 :: Word8) + pure (off + 1) + _ -> go n0 marr off off + ) + where go :: forall s. Natural -> MutableByteArray s -> Int -> Int -> ST s Int go !n !buf !off0 !off = case quotRem n 1_000_000_000 of - (q,r) -> case q of + (q, r) -> case q of 0 -> do off' <- backwardsWordLoop buf off (fromIntegral @Natural @Word r) reverseBytes buf off0 (off' - 1) pure off' _ -> do - off' <- backwardsPasteWordPaddedDec9 - (fromIntegral @Natural @Word r) buf off + off' <- + backwardsPasteWordPaddedDec9 + (fromIntegral @Natural @Word r) + buf + off go q buf off0 off' -- Reverse the bytes in the designated slice. This takes -- an inclusive start offset and an inclusive end offset. reverseBytes :: MutableByteArray s -> Int -> Int -> ST s () -{-# inline reverseBytes #-} -reverseBytes arr begin end = go begin end where - go ixA ixB = if ixA < ixB - then do - a :: Word8 <- PM.readByteArray arr ixA - b :: Word8 <- PM.readByteArray arr ixB - PM.writeByteArray arr ixA b - PM.writeByteArray arr ixB a - go (ixA + 1) (ixB - 1) - else pure () +{-# INLINE reverseBytes #-} +reverseBytes arr begin end = go begin end + where + go ixA ixB = + if ixA < ixB + then do + a :: Word8 <- PM.readByteArray arr ixA + b :: Word8 <- PM.readByteArray arr ixB + PM.writeByteArray arr ixA b + PM.writeByteArray arr ixB a + go (ixA + 1) (ixB - 1) + else pure () backwardsPasteWordPaddedDec9 :: Word -> MutableByteArray s -> Int -> ST s Int backwardsPasteWordPaddedDec9 !w !arr !off = do backwardsPutRem10 - (backwardsPutRem10 $ backwardsPutRem10 $ backwardsPutRem10 $ - backwardsPutRem10 $ backwardsPutRem10 $ backwardsPutRem10 $ - backwardsPutRem10 $ backwardsPutRem10 - (\_ _ _ -> pure ()) - ) arr off w + ( backwardsPutRem10 $ + backwardsPutRem10 $ + backwardsPutRem10 $ + backwardsPutRem10 $ + backwardsPutRem10 $ + backwardsPutRem10 $ + backwardsPutRem10 $ + backwardsPutRem10 + (\_ _ _ -> pure ()) + ) + arr + off + w pure (off + 9) backwardsPutRem10 :: - (MutableByteArray s -> Int -> Word -> ST s a) - -> MutableByteArray s -> Int -> Word -> ST s a -{-# inline backwardsPutRem10 #-} + (MutableByteArray s -> Int -> Word -> ST s a) -> + MutableByteArray s -> + Int -> + Word -> + ST s a +{-# INLINE backwardsPutRem10 #-} backwardsPutRem10 andThen arr off dividend = do let quotient = approxDiv10 dividend remainder = dividend - (10 * quotient) @@ -1306,25 +1492,31 @@ backwardsPutRem10 andThen arr off dividend = do andThen arr (off + 1) quotient backwardsWordLoop :: MutableByteArray s -> Int -> Word -> ST s Int -{-# inline backwardsWordLoop #-} -backwardsWordLoop arr off0 x0 = go off0 x0 where - go !off !(x :: Word) = if x > 0 - then do - let (y,z) = quotRem x 10 - PM.writeByteArray arr off (fromIntegral (z + 0x30) :: Word8) - go (off + 1) y - else pure off +{-# INLINE backwardsWordLoop #-} +backwardsWordLoop arr off0 x0 = go off0 x0 + where + go !off !(x :: Word) = + if x > 0 + then do + let (y, z) = quotRem x 10 + PM.writeByteArray arr off (fromIntegral (z + 0x30) :: Word8) + go (off + 1) y + else pure off -- | Replicate a byte the given number of times. replicate :: - Int -- ^ Number of times to replicate the byte - -> Word8 -- ^ Byte to replicate - -> Builder -replicate !len !w = fromEffect len - (\marr off -> do - PM.setByteArray marr off len w - pure (off + len) - ) + -- | Number of times to replicate the byte + Int -> + -- | Byte to replicate + Word8 -> + Builder +replicate !len !w = + fromEffect + len + ( \marr off -> do + PM.setByteArray marr off len w + pure (off + len) + ) -- Based on C code from https://stackoverflow.com/a/5558614 -- For numbers less than 1073741829, this gives a correct answer. @@ -1346,21 +1538,22 @@ approxDiv10 !n = unsafeShiftR (0x1999999A * n) 32 unsafeWordToWord8 :: Word -> Word8 unsafeWordToWord8 (W# w) = W8# (C.wordToWord8# w) --- | This function and the documentation for it are copied from --- Takano Akio's fast-builder library. --- --- @'rebuild' b@ is equivalent to @b@, but it allows GHC to assume --- that @b@ will be run at most once. This can enable various --- optimizations that greately improve performance. --- --- There are two types of typical situations where a use of 'rebuild' --- is often a win: --- --- * When constructing a builder using a recursive function. e.g. --- @rebuild $ foldr ...@. --- * When constructing a builder using a conditional expression. e.g. --- @rebuild $ case x of ... @ +{- | This function and the documentation for it are copied from +Takano Akio's fast-builder library. + +@'rebuild' b@ is equivalent to @b@, but it allows GHC to assume +that @b@ will be run at most once. This can enable various +optimizations that greately improve performance. + +There are two types of typical situations where a use of 'rebuild' +is often a win: + +* When constructing a builder using a recursive function. e.g. + @rebuild $ foldr ...@. +* When constructing a builder using a conditional expression. e.g. + @rebuild $ case x of ... @ +-} rebuild :: Builder -> Builder -{-# inline rebuild #-} +{-# INLINE rebuild #-} rebuild (Builder f) = Builder $ oneShot $ \a -> oneShot $ \b -> oneShot $ \c -> oneShot $ \d -> oneShot $ \e -> f a b c d e diff --git a/src/Data/Bytes/Builder/Avro.hs b/src/Data/Bytes/Builder/Avro.hs new file mode 100644 index 0000000..f86f350 --- /dev/null +++ b/src/Data/Bytes/Builder/Avro.hs @@ -0,0 +1,87 @@ +{-# LANGUAGE BangPatterns #-} + +{- | Builders for encoding data with Apache Avro. Most functions in this +module are just aliases for other functions. Avro uses zig-zag LEB128 +for all integral types. +-} +module Data.Bytes.Builder.Avro + ( int + , int32 + , int64 + , word16 + , word32 + , word128 + , bytes + , chunks + , text + + -- * Maps + , map2 + ) where + +import Data.Bytes (Bytes) +import Data.Bytes.Builder (Builder) +import Data.Bytes.Chunks (Chunks) +import Data.Int +import Data.Text (Text) +import Data.WideWord (Word128) +import Data.Word + +import qualified Data.Bytes as Bytes +import qualified Data.Bytes.Builder as B +import qualified Data.Bytes.Chunks as Chunks +import qualified Data.Bytes.Text.Utf8 as Utf8 + +int32 :: Int32 -> Builder +int32 = B.int32LEB128 + +int64 :: Int64 -> Builder +int64 = B.int64LEB128 + +int :: Int -> Builder +int = B.intLEB128 + +{- | Note: This results in a zigzag encoded number. Avro does not have +unsigned types. +-} +word16 :: Word16 -> Builder +word16 = B.int32LEB128 . fromIntegral + +{- | Note: This results in a zigzag encoded number. Avro does not have +unsigned types. +-} +word32 :: Word32 -> Builder +word32 = B.int64LEB128 . fromIntegral + +{- | Note: This results in a @fixed@ encoded value of length 16. In the +schema, the type must be @{"type": "fixed", "name": "...", "size": 16}@. +A big-endian encoding is used. +-} +word128 :: Word128 -> Builder +word128 = B.word128BE + +bytes :: Bytes -> Builder +bytes !b = int (Bytes.length b) <> B.bytes b + +chunks :: Chunks -> Builder +chunks !b = int (Chunks.length b) <> B.chunks b + +text :: Text -> Builder +text = bytes . Utf8.fromText + +{- | Encode a map with exactly two key-value pairs. The keys are text. +This is commonly used to encode the header in an avro file, which has +a map with two keys: @avro.schema@ and @avro.codec@. +-} +map2 :: + -- | First key + Text -> + -- | First value (already encoded) + Builder -> + -- | Second key + Text -> + -- | Second value (already encoded) + Builder -> + Builder +{-# INLINE map2 #-} +map2 k1 v1 k2 v2 = B.word8 0x04 <> text k1 <> v1 <> text k2 <> v2 <> B.word8 0x00 diff --git a/src/Data/Bytes/Builder/Bounded.hs b/src/Data/Bytes/Builder/Bounded.hs index c835d95..5703ef3 100644 --- a/src/Data/Bytes/Builder/Bounded.hs +++ b/src/Data/Bytes/Builder/Bounded.hs @@ -1,34 +1,38 @@ -{-# language CPP #-} -{-# language BangPatterns #-} -{-# language BinaryLiterals #-} -{-# language DataKinds #-} -{-# language KindSignatures #-} -{-# language LambdaCase #-} -{-# language MagicHash #-} -{-# language NumericUnderscores #-} -{-# language RankNTypes #-} -{-# language ScopedTypeVariables #-} -{-# language TypeApplications #-} -{-# language TypeOperators #-} -{-# language UnboxedTuples #-} -{-# language UnliftedFFITypes #-} +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE BinaryLiterals #-} +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE KindSignatures #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE NumericUnderscores #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TypeApplications #-} +{-# LANGUAGE TypeOperators #-} +{-# LANGUAGE UnboxedTuples #-} +{-# LANGUAGE UnliftedFFITypes #-} --- | The functions in this module are explict about the maximum number --- of bytes they require. +{- | The functions in this module are explict about the maximum number +of bytes they require. +-} module Data.Bytes.Builder.Bounded ( -- * Builder Builder + -- * Execute , run , runByteString , pasteGrowST + -- * Combine , empty , append + -- * Bounds Manipulation , weaken , substitute + -- * Encode Integral Types + -- ** Human-Readable , word64Dec , word32Dec @@ -40,25 +44,32 @@ module Data.Bytes.Builder.Bounded , int16Dec , int8Dec , intDec + -- * Unsigned Words + -- ** Wide Words , word128PaddedLowerHex , word128PaddedUpperHex , word256PaddedLowerHex , word256PaddedUpperHex + -- ** 64-bit , word64PaddedLowerHex , word64PaddedUpperHex + -- ** 48-bit , word48PaddedLowerHex + -- ** 32-bit , word32PaddedLowerHex , word32PaddedUpperHex + -- ** 16-bit , word16PaddedLowerHex , word16PaddedUpperHex , word16LowerHex , word16UpperHex + -- ** 8-bit , word8PaddedLowerHex , word8PaddedUpperHex @@ -72,13 +83,18 @@ module Data.Bytes.Builder.Bounded , ascii7 , ascii8 , char + -- ** Native , wordPaddedDec2 + , wordPaddedDec3 , wordPaddedDec4 , wordPaddedDec9 + -- ** Machine-Readable + -- *** One , word8 + -- **** Big Endian , word256BE , word128BE @@ -88,6 +104,7 @@ module Data.Bytes.Builder.Bounded , int64BE , int32BE , int16BE + -- **** Little Endian , word256LE , word128LE @@ -97,43 +114,48 @@ module Data.Bytes.Builder.Bounded , int64LE , int32LE , int16LE + -- **** LEB128 + -- | LEB128 encodes an integer in 7-bit units, least significant bits first, -- with the high bit of each output byte set to 1 in all bytes except for -- the final byte. , wordLEB128 + , word16LEB128 , word32LEB128 , word64LEB128 + -- **** VLQ + -- | VLQ (also known as VByte, Varint, VInt) encodes an integer in 7-bit -- units, most significant bits first, with the high bit of each output byte -- set to 1 in all bytes except for the final byte. , wordVlq , word32Vlq , word64Vlq + -- * Encode Floating-Point Types , doubleDec ) where -import Arithmetic.Types (type (<=), type (:=:)) +import Arithmetic.Types (type (:=:), type (<=)) import Control.Monad.Primitive (primitive_) import Control.Monad.ST (ST) -import Control.Monad.ST.Run (runByteArrayST,runIntByteArrayST) +import Control.Monad.ST.Run (runByteArrayST, runIntByteArrayST) import Data.Bits -import Data.Bytes.Builder.Bounded.Unsafe (Builder(..)) import Data.ByteString (ByteString) +import Data.Bytes.Builder.Bounded.Unsafe (Builder (..)) +import Data.Bytes.Types (Bytes (Bytes)) import Data.Char (ord) -import Data.Primitive (MutableByteArray(..),ByteArray,writeByteArray) -import Data.Primitive (readByteArray,newByteArray,unsafeFreezeByteArray) -import Data.Primitive.ByteArray.Offset (MutableByteArrayOffset(..)) -import Data.WideWord (Word128(Word128),Word256(Word256)) +import Data.Primitive (ByteArray, MutableByteArray (..), newByteArray, readByteArray, unsafeFreezeByteArray, writeByteArray) +import Data.Primitive.ByteArray.Offset (MutableByteArrayOffset (..)) +import Data.WideWord (Word128 (Word128), Word256 (Word256)) import GHC.Exts -import GHC.Int (Int64(I64#),Int32(I32#),Int16(I16#),Int8(I8#)) import GHC.IO (unsafeIOToST) -import GHC.ST (ST(ST)) +import GHC.Int (Int16 (I16#), Int32 (I32#), Int64 (I64#), Int8 (I8#)) +import GHC.ST (ST (ST)) import GHC.TypeLits (type (+)) -import GHC.Word (Word8(W8#),Word16(W16#),Word32(W32#),Word64(W64#)) -import Data.Bytes.Types (Bytes(Bytes)) +import GHC.Word (Word16 (W16#), Word32 (W32#), Word64 (W64#), Word8 (W8#)) import qualified Compat as C @@ -144,48 +166,53 @@ import qualified Data.Bytes as Bytes import qualified Data.Bytes.Builder.Bounded.Unsafe as Unsafe import qualified Data.Primitive as PM --- | Execute the bounded builder. If the size is a constant, --- use @Arithmetic.Nat.constant@ as the first argument to let --- GHC conjure up this value for you. +{- | Execute the bounded builder. If the size is a constant, +use @Arithmetic.Nat.constant@ as the first argument to let +GHC conjure up this value for you. +-} run :: - Arithmetic.Nat n - -> Builder n -- ^ Builder - -> ByteArray -{-# inline run #-} + Arithmetic.Nat n -> + -- | Builder + Builder n -> + ByteArray +{-# INLINE run #-} run n b = runByteArrayST $ do arr <- newByteArray (Nat.demote n) len <- Unsafe.pasteST b arr 0 shrinkMutableByteArray arr len unsafeFreezeByteArray arr --- | Variant of 'run' that puts the result in a pinned buffer and --- packs it up in a 'ByteString'. +{- | Variant of 'run' that puts the result in a pinned buffer and +packs it up in a 'ByteString'. +-} runByteString :: - Arithmetic.Nat n - -> Builder n -- ^ Builder - -> ByteString -{-# inline runByteString #-} + Arithmetic.Nat n -> + -- | Builder + Builder n -> + ByteString +{-# INLINE runByteString #-} runByteString n b = - let (finalLen,r) = runIntByteArrayST $ do + let (finalLen, r) = runIntByteArrayST $ do arr <- PM.newPinnedByteArray (Nat.demote n) len <- Unsafe.pasteST b arr 0 shrinkMutableByteArray arr len arr' <- unsafeFreezeByteArray arr - pure (len,arr') + pure (len, arr') in Bytes.pinnedToByteString (Bytes r 0 finalLen) --- | Paste the builder into the byte array starting at offset zero. --- This reallocates the byte array if it cannot accomodate the builder, --- growing it by the minimum amount necessary. +{- | Paste the builder into the byte array starting at offset zero. +This reallocates the byte array if it cannot accomodate the builder, +growing it by the minimum amount necessary. +-} pasteGrowST :: - Arithmetic.Nat n - -> Builder n - -> MutableByteArrayOffset s - -- ^ Initial buffer, used linearly. Do not reuse this argument. - -> ST s (MutableByteArrayOffset s) - -- ^ Final buffer that accomodated the builder. -{-# inline pasteGrowST #-} -pasteGrowST n b !(MutableByteArrayOffset{array=arr0,offset=off0}) = do + Arithmetic.Nat n -> + Builder n -> + -- | Initial buffer, used linearly. Do not reuse this argument. + MutableByteArrayOffset s -> + -- | Final buffer that accomodated the builder. + ST s (MutableByteArrayOffset s) +{-# INLINE pasteGrowST #-} +pasteGrowST n b !(MutableByteArrayOffset {array = arr0, offset = off0}) = do sz0 <- PM.getSizeofMutableByteArray arr0 let req = Nat.demote n let sz1 = off0 + req @@ -213,15 +240,16 @@ unsafeAppend (Builder f) (Builder g) = Builder $ \arr off0 s0 -> case f arr off0 s0 of (# s1, r #) -> g arr r s1 --- | Weaken the bound on the maximum number of bytes required. For example, --- to use two builders with unequal bounds in a disjunctive setting: --- --- > import qualified Arithmetic.Lte as Lte --- > --- > buildNumber :: Either Double Word64 -> Builder 32 --- > buildNumber = \case --- > Left d -> doubleDec d --- > Right w -> weaken (Lte.constant @19 @32) (word64Dec w) +{- | Weaken the bound on the maximum number of bytes required. For example, +to use two builders with unequal bounds in a disjunctive setting: + +> import qualified Arithmetic.Lte as Lte +> +> buildNumber :: Either Double Word64 -> Builder 32 +> buildNumber = \case +> Left d -> doubleDec d +> Right w -> weaken (Lte.constant @19 @32) (word64Dec w) +-} weaken :: forall m n. (m <= n) -> Builder m -> Builder n weaken !_ (Builder f) = Builder f @@ -229,35 +257,35 @@ weaken !_ (Builder f) = Builder f substitute :: forall m n. (m :=: n) -> Builder m -> Builder n substitute !_ (Builder f) = Builder f --- | Encode a double-floating-point number, using decimal notation or --- scientific notation depending on the magnitude. This has undefined --- behavior when representing @+inf@, @-inf@, and @NaN@. It will not --- crash, but the generated numbers will be nonsense. +{- | Encode a double-floating-point number, using decimal notation or +scientific notation depending on the magnitude. This has undefined +behavior when representing @+inf@, @-inf@, and @NaN@. It will not +crash, but the generated numbers will be nonsense. +-} doubleDec :: Double -> Builder 32 doubleDec (D# d) = Builder (\arr off0 s0 -> doubleDec# d arr off0 s0) --- | Requires up to 19 bytes. Encodes an unsigned 64-bit integer as decimal. --- This encoding never starts with a zero unless the argument was zero. +{- | Requires up to 19 bytes. Encodes an unsigned 64-bit integer as decimal. +This encoding never starts with a zero unless the argument was zero. +-} word64Dec :: Word64 -> Builder 19 -word64Dec (W64# w) = wordCommonDec# -#if MIN_VERSION_base(4,17,0) - (word64ToWord# w) -#else - w -#endif +word64Dec (W64# w) = wordCommonDec# (word64ToWord# w) --- | Requires up to 10 bytes. Encodes an unsigned 32-bit integer as decimal. --- This encoding never starts with a zero unless the argument was zero. +{- | Requires up to 10 bytes. Encodes an unsigned 32-bit integer as decimal. +This encoding never starts with a zero unless the argument was zero. +-} word32Dec :: Word32 -> Builder 10 word32Dec (W32# w) = wordCommonDec# (C.word32ToWord# w) --- | Requires up to 5 bytes. Encodes an unsigned 16-bit integer as decimal. --- This encoding never starts with a zero unless the argument was zero. +{- | Requires up to 5 bytes. Encodes an unsigned 16-bit integer as decimal. +This encoding never starts with a zero unless the argument was zero. +-} word16Dec :: Word16 -> Builder 5 word16Dec (W16# w) = wordCommonDec# (C.word16ToWord# w) --- | Requires up to 3 bytes. Encodes an unsigned 8-bit integer as decimal. --- This encoding never starts with a zero unless the argument was zero. +{- | Requires up to 3 bytes. Encodes an unsigned 8-bit integer as decimal. +This encoding never starts with a zero unless the argument was zero. +-} word8Dec :: Word8 -> Builder 3 word8Dec (W8# w) = -- We unroll the loop when encoding Word8s. This speeds things @@ -268,106 +296,105 @@ word8Dec (W8# w) = -- L1 cache in real applications. word8Dec# (C.word8ToWord# w) --- | Requires up to 19 bytes. Encodes an unsigned machine-sized integer --- as decimal. This encoding never starts with a zero unless the argument --- was zero. +{- | Requires up to 19 bytes. Encodes an unsigned machine-sized integer +as decimal. This encoding never starts with a zero unless the argument +was zero. +-} wordDec :: Word -> Builder 19 wordDec (W# w) = wordCommonDec# w --- | Requires up to 20 bytes. Encodes a signed 64-bit integer as decimal. --- This encoding never starts with a zero unless the argument was zero. --- Negative numbers are preceded by a minus sign. Positive numbers --- are not preceded by anything. +{- | Requires up to 20 bytes. Encodes a signed 64-bit integer as decimal. +This encoding never starts with a zero unless the argument was zero. +Negative numbers are preceded by a minus sign. Positive numbers +are not preceded by anything. +-} int64Dec :: Int64 -> Builder 20 -int64Dec (I64# w) = intCommonDec# -#if MIN_VERSION_base(4,17,0) - (int64ToInt# w) -#else - w -#endif +int64Dec (I64# w) = intCommonDec# (int64ToInt# w) --- | Requires up to 11 bytes. Encodes a signed 32-bit integer as decimal. --- This encoding never starts with a zero unless the argument was zero. --- Negative numbers are preceded by a minus sign. Positive numbers --- are not preceded by anything. +{- | Requires up to 11 bytes. Encodes a signed 32-bit integer as decimal. +This encoding never starts with a zero unless the argument was zero. +Negative numbers are preceded by a minus sign. Positive numbers +are not preceded by anything. +-} int32Dec :: Int32 -> Builder 11 int32Dec (I32# w) = intCommonDec# (C.int32ToInt# w) --- | Requires up to 6 bytes. Encodes a signed 16-bit integer as decimal. --- This encoding never starts with a zero unless the argument was zero. --- Negative numbers are preceded by a minus sign. Positive numbers --- are not preceded by anything. +{- | Requires up to 6 bytes. Encodes a signed 16-bit integer as decimal. +This encoding never starts with a zero unless the argument was zero. +Negative numbers are preceded by a minus sign. Positive numbers +are not preceded by anything. +-} int16Dec :: Int16 -> Builder 6 int16Dec (I16# w) = intCommonDec# (C.int16ToInt# w) --- | Requires up to 4 bytes. Encodes a signed 8-bit integer as decimal. --- This encoding never starts with a zero unless the argument was zero. --- Negative numbers are preceded by a minus sign. Positive numbers --- are not preceded by anything. +{- | Requires up to 4 bytes. Encodes a signed 8-bit integer as decimal. +This encoding never starts with a zero unless the argument was zero. +Negative numbers are preceded by a minus sign. Positive numbers +are not preceded by anything. +-} int8Dec :: Int8 -> Builder 4 int8Dec (I8# w) = intCommonDec# (C.int8ToInt# w) --- | Requires up to 20 bytes. Encodes a signed machine-sized integer --- as decimal. This encoding never starts with a zero unless the --- argument was zero. Negative numbers are preceded by a minus sign. --- Positive numbers are not preceded by anything. +{- | Requires up to 20 bytes. Encodes a signed machine-sized integer +as decimal. This encoding never starts with a zero unless the +argument was zero. Negative numbers are preceded by a minus sign. +Positive numbers are not preceded by anything. +-} intDec :: Int -> Builder 20 intDec (I# w) = intCommonDec# w word8Dec# :: Word# -> Builder 3 -{-# noinline word8Dec# #-} +{-# NOINLINE word8Dec# #-} word8Dec# w# = Unsafe.construct $ \arr off0 -> do - let !(I# off0# ) = off0 - !(!x,!ones) = quotRem w 10 - !(hundreds@(W# hundreds# ),tens@(W# tens# )) = quotRem x 10 + let !(I# off0#) = off0 + !(!x, !ones) = quotRem w 10 + !(hundreds@(W# hundreds#), tens@(W# tens#)) = quotRem x 10 writeByteArray arr off0 (fromIntegral (hundreds + 0x30) :: Word8) let !hasHundreds = gtWord# hundreds# 0## - !off1@(I# off1# ) = I# (off0# +# hasHundreds) + !off1@(I# off1#) = I# (off0# +# hasHundreds) writeByteArray arr off1 (fromIntegral (tens + 0x30) :: Word8) - let !off2 = I# (off1# +# (orI# hasHundreds (gtWord# tens# 0## ))) + let !off2 = I# (off1# +# (orI# hasHundreds (gtWord# tens# 0##))) writeByteArray arr off2 (fromIntegral (ones + 0x30) :: Word8) pure (off2 + 1) - where + where w = W# w# -- Requires a number of bytes that is bounded by the size of -- the word. This is only used internally. wordCommonDec# :: Word# -> Builder n -{-# noinline wordCommonDec# #-} -wordCommonDec# w# = Unsafe.construct $ \arr off0 -> if w /= 0 - then internalWordLoop arr off0 (W# w#) - else do - writeByteArray arr off0 (c2w '0') - pure (off0 + 1) - where - w = W64# -#if MIN_VERSION_base(4,17,0) - (wordToWord64# w#) -#else - w# -#endif +{-# NOINLINE wordCommonDec# #-} +wordCommonDec# w# = Unsafe.construct $ \arr off0 -> + if w /= 0 + then internalWordLoop arr off0 (W# w#) + else do + writeByteArray arr off0 (c2w '0') + pure (off0 + 1) + where + w = W64# (wordToWord64# w#) internalWordLoop :: MutableByteArray s -> Int -> Word -> ST s Int -{-# inline internalWordLoop #-} +{-# INLINE internalWordLoop #-} internalWordLoop arr off0 x0 = do off1 <- backwardsWordLoop arr off0 x0 reverseBytes arr off0 (off1 - 1) pure off1 backwardsWordLoop :: MutableByteArray s -> Int -> Word -> ST s Int -{-# inline backwardsWordLoop #-} -backwardsWordLoop arr off0 x0 = go off0 x0 where - go !off !(x :: Word) = if x > 0 - then do - let (y,z) = quotRem x 10 - writeByteArray arr off (fromIntegral (z + 0x30) :: Word8) - go (off + 1) y - else pure off +{-# INLINE backwardsWordLoop #-} +backwardsWordLoop arr off0 x0 = go off0 x0 + where + go !off !(x :: Word) = + if x > 0 + then do + let (y, z) = quotRem x 10 + writeByteArray arr off (fromIntegral (z + 0x30) :: Word8) + go (off + 1) y + else pure off -- Requires up to 20 bytes. Can be less depending on what the -- size of the argument is known to be. Unsafe. intCommonDec# :: Int# -> Builder n -{-# noinline intCommonDec# #-} +{-# NOINLINE intCommonDec# #-} intCommonDec# w# = Unsafe.construct $ \arr off0 -> case compare w 0 of GT -> internalWordLoop arr off0 (fromIntegral w) EQ -> do @@ -376,13 +403,8 @@ intCommonDec# w# = Unsafe.construct $ \arr off0 -> case compare w 0 of LT -> do writeByteArray arr off0 (c2w '-') internalWordLoop arr (off0 + 1) (fromIntegral (negate w)) - where - w = I64# -#if MIN_VERSION_base(4,17,0) - (intToInt64# w#) -#else - w# -#endif + where + w = I64# (intToInt64# w#) -- Convert a number between 0 and 16 to the ASCII -- representation of its hexadecimal character. @@ -390,10 +412,11 @@ intCommonDec# w# = Unsafe.construct $ \arr off0 -> case compare w 0 of -- unneeded bitmask. This actually needs a Word64 -- argument. toHexUpper :: Word -> Word8 -toHexUpper w' = fromIntegral - $ (complement theMask .&. loSolved) - .|. (theMask .&. hiSolved) - where +toHexUpper w' = + fromIntegral $ + (complement theMask .&. loSolved) + .|. (theMask .&. hiSolved) + where w = w' .&. 0xF -- This is all ones if the value was >= 10 theMask = (1 .&. unsafeShiftR (w - 10) 63) - 1 @@ -401,162 +424,161 @@ toHexUpper w' = fromIntegral hiSolved = w + 55 toHexLower :: Word -> Word8 -toHexLower w' = fromIntegral - $ (complement theMask .&. loSolved) - .|. (theMask .&. hiSolved) - where +toHexLower w' = + fromIntegral $ + (complement theMask .&. loSolved) + .|. (theMask .&. hiSolved) + where w = w' .&. 0xF -- This is all ones if the value was >= 10 theMask = (1 .&. unsafeShiftR (w - 10) 63) - 1 loSolved = w + 48 hiSolved = w + 87 --- | Requires exactly 64 bytes. Encodes a 256-bit unsigned integer as --- hexadecimal, zero-padding the encoding to 64 digits. This uses --- lowercase for the alphabetical digits. +{- | Requires exactly 64 bytes. Encodes a 256-bit unsigned integer as +hexadecimal, zero-padding the encoding to 64 digits. This uses +lowercase for the alphabetical digits. +-} word256PaddedLowerHex :: Word256 -> Builder 64 word256PaddedLowerHex (Word256 w192 w128 w64 w0) = - word64PaddedLowerHex w192 - `append` word64PaddedLowerHex w128 - `append` word64PaddedLowerHex w64 - `append` word64PaddedLowerHex w0 + word64PaddedLowerHex w192 + `append` word64PaddedLowerHex w128 + `append` word64PaddedLowerHex w64 + `append` word64PaddedLowerHex w0 --- | Requires exactly 64 bytes. Encodes a 256-bit unsigned integer as --- hexadecimal, zero-padding the encoding to 64 digits. This uses --- uppercase for the alphabetical digits. +{- | Requires exactly 64 bytes. Encodes a 256-bit unsigned integer as +hexadecimal, zero-padding the encoding to 64 digits. This uses +uppercase for the alphabetical digits. +-} word256PaddedUpperHex :: Word256 -> Builder 64 word256PaddedUpperHex (Word256 w192 w128 w64 w0) = - word64PaddedUpperHex w192 - `append` word64PaddedUpperHex w128 - `append` word64PaddedUpperHex w64 - `append` word64PaddedUpperHex w0 + word64PaddedUpperHex w192 + `append` word64PaddedUpperHex w128 + `append` word64PaddedUpperHex w64 + `append` word64PaddedUpperHex w0 - --- | Requires exactly 32 bytes. Encodes a 128-bit unsigned integer as --- hexadecimal, zero-padding the encoding to 32 digits. This uses --- lowercase for the alphabetical digits. +{- | Requires exactly 32 bytes. Encodes a 128-bit unsigned integer as +hexadecimal, zero-padding the encoding to 32 digits. This uses +lowercase for the alphabetical digits. +-} word128PaddedLowerHex :: Word128 -> Builder 32 word128PaddedLowerHex (Word128 w64 w0) = - word64PaddedLowerHex w64 - `append` word64PaddedLowerHex w0 + word64PaddedLowerHex w64 + `append` word64PaddedLowerHex w0 --- | Requires exactly 32 bytes. Encodes a 128-bit unsigned integer as --- hexadecimal, zero-padding the encoding to 32 digits. This uses --- uppercase for the alphabetical digits. +{- | Requires exactly 32 bytes. Encodes a 128-bit unsigned integer as +hexadecimal, zero-padding the encoding to 32 digits. This uses +uppercase for the alphabetical digits. +-} word128PaddedUpperHex :: Word128 -> Builder 32 word128PaddedUpperHex (Word128 w64 w0) = - word64PaddedUpperHex w64 - `append` word64PaddedUpperHex w0 + word64PaddedUpperHex w64 + `append` word64PaddedUpperHex w0 - --- | Requires exactly 16 bytes. Encodes a 64-bit unsigned integer as --- hexadecimal, zero-padding the encoding to 16 digits. This uses --- uppercase for the alphabetical digits. For example, this encodes the --- number 1022 as @00000000000003FE@. +{- | Requires exactly 16 bytes. Encodes a 64-bit unsigned integer as +hexadecimal, zero-padding the encoding to 16 digits. This uses +uppercase for the alphabetical digits. For example, this encodes the +number 1022 as @00000000000003FE@. +-} word64PaddedUpperHex :: Word64 -> Builder 16 -word64PaddedUpperHex (W64# w) = word64PaddedUpperHex# -#if MIN_VERSION_base(4,17,0) - (word64ToWord# w) -#else - w -#endif +word64PaddedUpperHex (W64# w) = word64PaddedUpperHex# (word64ToWord# w) - --- | Requires exactly 16 bytes. Encodes a 64-bit unsigned integer as --- hexadecimal, zero-padding the encoding to 16 digits. This uses --- lowercase for the alphabetical digits. For example, this encodes the --- number 1022 as @00000000000003fe@. +{- | Requires exactly 16 bytes. Encodes a 64-bit unsigned integer as +hexadecimal, zero-padding the encoding to 16 digits. This uses +lowercase for the alphabetical digits. For example, this encodes the +number 1022 as @00000000000003fe@. +-} word64PaddedLowerHex :: Word64 -> Builder 16 -word64PaddedLowerHex (W64# w) = word64PaddedLowerHex# -#if MIN_VERSION_base(4,17,0) - (word64ToWord# w) -#else - w -#endif +word64PaddedLowerHex (W64# w) = word64PaddedLowerHex# (word64ToWord# w) --- | Requires exactly 12 bytes. Discards the upper 16 bits of a --- 64-bit unsigned integer and then encodes the lower 48 bits as --- hexadecimal, zero-padding the encoding to 12 digits. This uses --- lowercase for the alphabetical digits. For example, this encodes the --- number 1022 as @0000000003fe@. +{- | Requires exactly 12 bytes. Discards the upper 16 bits of a +64-bit unsigned integer and then encodes the lower 48 bits as +hexadecimal, zero-padding the encoding to 12 digits. This uses +lowercase for the alphabetical digits. For example, this encodes the +number 1022 as @0000000003fe@. +-} word48PaddedLowerHex :: Word64 -> Builder 12 -word48PaddedLowerHex (W64# w) = word48PaddedLowerHex# -#if MIN_VERSION_base(4,17,0) - (word64ToWord# w) -#else - w -#endif +word48PaddedLowerHex (W64# w) = word48PaddedLowerHex# (word64ToWord# w) --- | Requires exactly 8 bytes. Encodes a 32-bit unsigned integer as --- hexadecimal, zero-padding the encoding to 8 digits. This uses --- uppercase for the alphabetical digits. +{- | Requires exactly 8 bytes. Encodes a 32-bit unsigned integer as +hexadecimal, zero-padding the encoding to 8 digits. This uses +uppercase for the alphabetical digits. +-} word32PaddedUpperHex :: Word32 -> Builder 8 word32PaddedUpperHex (W32# w) = word32PaddedUpperHex# (C.word32ToWord# w) --- | Requires exactly 8 bytes. Encodes a 32-bit unsigned integer as --- hexadecimal, zero-padding the encoding to 8 digits. This uses --- lowercase for the alphabetical digits. +{- | Requires exactly 8 bytes. Encodes a 32-bit unsigned integer as +hexadecimal, zero-padding the encoding to 8 digits. This uses +lowercase for the alphabetical digits. +-} word32PaddedLowerHex :: Word32 -> Builder 8 word32PaddedLowerHex (W32# w) = word32PaddedLowerHex# (C.word32ToWord# w) --- | Requires exactly 4 bytes. Encodes a 16-bit unsigned integer as --- hexadecimal, zero-padding the encoding to 4 digits. This uses --- uppercase for the alphabetical digits. --- --- >>> word16PaddedUpperHex 0xab0 --- 0AB0 +{- | Requires exactly 4 bytes. Encodes a 16-bit unsigned integer as +hexadecimal, zero-padding the encoding to 4 digits. This uses +uppercase for the alphabetical digits. + +>>> word16PaddedUpperHex 0xab0 +0AB0 +-} word16PaddedUpperHex :: Word16 -> Builder 4 word16PaddedUpperHex (W16# w) = word16PaddedUpperHex# (C.word16ToWord# w) --- | Requires exactly 4 bytes. Encodes a 16-bit unsigned integer as --- hexadecimal, zero-padding the encoding to 4 digits. This uses --- lowercase for the alphabetical digits. --- --- >>> word16PaddedLowerHex 0xab0 --- 0ab0 +{- | Requires exactly 4 bytes. Encodes a 16-bit unsigned integer as +hexadecimal, zero-padding the encoding to 4 digits. This uses +lowercase for the alphabetical digits. + +>>> word16PaddedLowerHex 0xab0 +0ab0 +-} word16PaddedLowerHex :: Word16 -> Builder 4 word16PaddedLowerHex (W16# w) = word16PaddedLowerHex# (C.word16ToWord# w) --- | Requires at most 4 bytes. Encodes a 16-bit unsigned integer as --- hexadecimal. No leading zeroes are displayed. Letters are presented --- in lowercase. If the number is zero, a single zero digit is used. --- --- >>> word16LowerHex 0xab0 --- ab0 +{- | Requires at most 4 bytes. Encodes a 16-bit unsigned integer as +hexadecimal. No leading zeroes are displayed. Letters are presented +in lowercase. If the number is zero, a single zero digit is used. + +>>> word16LowerHex 0xab0 +ab0 +-} word16LowerHex :: Word16 -> Builder 4 word16LowerHex (W16# w) = word16LowerHex# (C.word16ToWord# w) --- | Requires at most 4 bytes. Encodes a 16-bit unsigned integer as --- hexadecimal. No leading zeroes are displayed. Letters are presented --- in uppercase. If the number is zero, a single zero digit is used. --- --- >>> word16UpperHex 0xab0 --- AB0 +{- | Requires at most 4 bytes. Encodes a 16-bit unsigned integer as +hexadecimal. No leading zeroes are displayed. Letters are presented +in uppercase. If the number is zero, a single zero digit is used. + +>>> word16UpperHex 0xab0 +AB0 +-} word16UpperHex :: Word16 -> Builder 4 word16UpperHex (W16# w) = word16UpperHex# (C.word16ToWord# w) --- | Requires at most 2 bytes. Encodes a 8-bit unsigned integer as --- hexadecimal. No leading zeroes are displayed. If the number is zero, --- a single zero digit is used. +{- | Requires at most 2 bytes. Encodes a 8-bit unsigned integer as +hexadecimal. No leading zeroes are displayed. If the number is zero, +a single zero digit is used. +-} word8LowerHex :: Word8 -> Builder 2 word8LowerHex (W8# w) = word8LowerHex# (C.word8ToWord# w) --- | Requires exactly 2 bytes. Encodes a 8-bit unsigned integer as --- hexadecimal, zero-padding the encoding to 2 digits. This uses --- uppercase for the alphabetical digits. +{- | Requires exactly 2 bytes. Encodes a 8-bit unsigned integer as +hexadecimal, zero-padding the encoding to 2 digits. This uses +uppercase for the alphabetical digits. +-} word8PaddedUpperHex :: Word8 -> Builder 2 word8PaddedUpperHex (W8# w) = word8PaddedUpperHex# (C.word8ToWord# w) --- | Requires exactly 2 bytes. Encodes a 8-bit unsigned integer as --- hexadecimal, zero-padding the encoding to 2 digits. This uses --- lowercase for the alphabetical digits. +{- | Requires exactly 2 bytes. Encodes a 8-bit unsigned integer as +hexadecimal, zero-padding the encoding to 2 digits. This uses +lowercase for the alphabetical digits. +-} word8PaddedLowerHex :: Word8 -> Builder 2 word8PaddedLowerHex (W8# w) = word8PaddedLowerHex# (C.word8ToWord# w) -- TODO: Is it actually worth unrolling this loop. I suspect that it -- might not be. Benchmark this. word64PaddedUpperHex# :: Word# -> Builder 16 -{-# noinline word64PaddedUpperHex# #-} +{-# NOINLINE word64PaddedUpperHex# #-} word64PaddedUpperHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr off (toHexUpper (unsafeShiftR w 60)) writeByteArray arr (off + 1) (toHexUpper (unsafeShiftR w 56)) @@ -575,13 +597,13 @@ word64PaddedUpperHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr (off + 14) (toHexUpper (unsafeShiftR w 4)) writeByteArray arr (off + 15) (toHexUpper (unsafeShiftR w 0)) pure (off + 16) - where + where w = W# w# -- TODO: Is it actually worth unrolling this loop. I suspect that it -- might not be. Benchmark this. word48PaddedLowerHex# :: Word# -> Builder 12 -{-# noinline word48PaddedLowerHex# #-} +{-# NOINLINE word48PaddedLowerHex# #-} word48PaddedLowerHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr off (toHexLower (unsafeShiftR w 44)) writeByteArray arr (off + 1) (toHexLower (unsafeShiftR w 40)) @@ -596,13 +618,13 @@ word48PaddedLowerHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr (off + 10) (toHexLower (unsafeShiftR w 4)) writeByteArray arr (off + 11) (toHexLower w) pure (off + 12) - where + where w = W# w# -- TODO: Is it actually worth unrolling this loop. I suspect that it -- might not be. Benchmark this. word64PaddedLowerHex# :: Word# -> Builder 16 -{-# noinline word64PaddedLowerHex# #-} +{-# NOINLINE word64PaddedLowerHex# #-} word64PaddedLowerHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr off (toHexLower (unsafeShiftR w 60)) writeByteArray arr (off + 1) (toHexLower (unsafeShiftR w 56)) @@ -621,11 +643,11 @@ word64PaddedLowerHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr (off + 14) (toHexLower (unsafeShiftR w 4)) writeByteArray arr (off + 15) (toHexLower (unsafeShiftR w 0)) pure (off + 16) - where + where w = W# w# word32PaddedUpperHex# :: Word# -> Builder 8 -{-# noinline word32PaddedUpperHex# #-} +{-# NOINLINE word32PaddedUpperHex# #-} word32PaddedUpperHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr off (toHexUpper (unsafeShiftR w 28)) writeByteArray arr (off + 1) (toHexUpper (unsafeShiftR w 24)) @@ -636,11 +658,11 @@ word32PaddedUpperHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr (off + 6) (toHexUpper (unsafeShiftR w 4)) writeByteArray arr (off + 7) (toHexUpper (unsafeShiftR w 0)) pure (off + 8) - where + where w = W# w# word32PaddedLowerHex# :: Word# -> Builder 8 -{-# noinline word32PaddedLowerHex# #-} +{-# NOINLINE word32PaddedLowerHex# #-} word32PaddedLowerHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr off (toHexLower (unsafeShiftR w 28)) writeByteArray arr (off + 1) (toHexLower (unsafeShiftR w 24)) @@ -651,7 +673,7 @@ word32PaddedLowerHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr (off + 6) (toHexLower (unsafeShiftR w 4)) writeByteArray arr (off + 7) (toHexLower (unsafeShiftR w 0)) pure (off + 8) - where + where w = W# w# -- Not sure if it is beneficial to inline this. We just let @@ -664,7 +686,7 @@ word16PaddedUpperHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr (off + 2) (toHexUpper (unsafeShiftR w 4)) writeByteArray arr (off + 3) (toHexUpper (unsafeShiftR w 0)) pure (off + 4) - where + where w = W# w# word16PaddedLowerHex# :: Word# -> Builder 4 @@ -674,7 +696,7 @@ word16PaddedLowerHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr (off + 2) (toHexLower (unsafeShiftR w 4)) writeByteArray arr (off + 3) (toHexLower (unsafeShiftR w 0)) pure (off + 4) - where + where w = W# w# word12PaddedLowerHex# :: Word# -> Builder 3 @@ -683,7 +705,7 @@ word12PaddedLowerHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr (off + 1) (toHexLower (unsafeShiftR w 4)) writeByteArray arr (off + 2) (toHexLower (unsafeShiftR w 0)) pure (off + 3) - where + where w = W# w# word12PaddedUpperHex# :: Word# -> Builder 3 @@ -692,42 +714,42 @@ word12PaddedUpperHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr (off + 1) (toHexUpper (unsafeShiftR w 4)) writeByteArray arr (off + 2) (toHexUpper (unsafeShiftR w 0)) pure (off + 3) - where + where w = W# w# -- Definitely want this to inline. It's maybe a dozen instructions total. word8PaddedUpperHex# :: Word# -> Builder 2 -{-# inline word8PaddedUpperHex# #-} +{-# INLINE word8PaddedUpperHex# #-} word8PaddedUpperHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr off (toHexUpper (unsafeShiftR w 4)) writeByteArray arr (off + 1) (toHexUpper (unsafeShiftR w 0)) pure (off + 2) - where + where w = W# w# word8PaddedLowerHex# :: Word# -> Builder 2 -{-# inline word8PaddedLowerHex# #-} +{-# INLINE word8PaddedLowerHex# #-} word8PaddedLowerHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr off (toHexLower (unsafeShiftR w 4)) writeByteArray arr (off + 1) (toHexLower (unsafeShiftR w 0)) pure (off + 2) - where + where w = W# w# word4PaddedLowerHex# :: Word# -> Builder 1 -{-# inline word4PaddedLowerHex# #-} +{-# INLINE word4PaddedLowerHex# #-} word4PaddedLowerHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr off (toHexLower w) pure (off + 1) - where + where w = W# w# word4PaddedUpperHex# :: Word# -> Builder 1 -{-# inline word4PaddedUpperHex# #-} +{-# INLINE word4PaddedUpperHex# #-} word4PaddedUpperHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr off (toHexUpper w) pure (off + 1) - where + where w = W# w# word16UpperHex# :: Word# -> Builder 4 @@ -736,7 +758,7 @@ word16UpperHex# w# | w <= 0xFF = weaken Lte.constant (word8PaddedUpperHex# w#) | w <= 0xFFF = weaken Lte.constant (word12PaddedUpperHex# w#) | otherwise = word16PaddedUpperHex# w# - where + where w = W# w# word16LowerHex# :: Word# -> Builder 4 @@ -745,7 +767,7 @@ word16LowerHex# w# | w <= 0xFF = weaken Lte.constant (word8PaddedLowerHex# w#) | w <= 0xFFF = weaken Lte.constant (word12PaddedLowerHex# w#) | otherwise = word16PaddedLowerHex# w# - where + where w = W# w# -- Precondition: argument less than 256 @@ -753,17 +775,18 @@ word8LowerHex# :: Word# -> Builder 2 word8LowerHex# w# | w <= 0xF = weaken Lte.constant (word4PaddedLowerHex# w#) | otherwise = weaken Lte.constant (word8PaddedLowerHex# w#) - where + where w = W# w# --- | Encode a number less than 100 as a decimal number, zero-padding it to --- two digits. For example: 0 is encoded as @00@, 5 is encoded as @05@, and --- 73 is encoded as @73@. --- --- Precondition: Argument must be less than 100. Failure to satisfy this --- precondition will not result in a segfault, but the resulting bytes are --- undefined. The implemention uses a heuristic for division that is inaccurate --- for large numbers. +{- | Encode a number less than 100 as a decimal number, zero-padding it to +two digits. For example: 0 is encoded as @00@, 5 is encoded as @05@, and +73 is encoded as @73@. + +Precondition: Argument must be less than 100. Failure to satisfy this +precondition will not result in a segfault, but the resulting bytes are +undefined. The implemention uses a heuristic for division that is inaccurate +for large numbers. +-} wordPaddedDec2 :: Word -> Builder 2 wordPaddedDec2 !w = Unsafe.construct $ \arr off -> do let d1 = approxDiv10 w @@ -772,172 +795,201 @@ wordPaddedDec2 !w = Unsafe.construct $ \arr off -> do writeByteArray arr (off + 1) (unsafeWordToWord8 (d2 + 48)) pure (off + 2) --- | Encode a number less than 10000 as a decimal number, zero-padding it to --- two digits. For example: 0 is encoded as @0000@, 5 is encoded as @0005@, --- and 73 is encoded as @0073@. --- --- Precondition: Argument must be less than 10000. Failure to satisfy this --- precondition will not result in a segfault, but the resulting bytes are --- undefined. The implemention uses a heuristic for division that is inaccurate --- for large numbers. +{- | Encode a number less than 10000 as a decimal number, zero-padding it to +two digits. For example: 0 is encoded as @0000@, 5 is encoded as @0005@, +and 73 is encoded as @0073@. + +Precondition: Argument must be less than 10000. Failure to satisfy this +precondition will not result in a segfault, but the resulting bytes are +undefined. The implemention uses a heuristic for division that is inaccurate +for large numbers. +-} wordPaddedDec4 :: Word -> Builder 4 wordPaddedDec4 !w = Unsafe.construct $ \arr off -> do putRem10 - (putRem10 $ putRem10 $ putRem10 - (\_ _ _ -> pure ()) - ) arr (off + 3) w + ( putRem10 $ + putRem10 $ + putRem10 + (\_ _ _ -> pure ()) + ) + arr + (off + 3) + w pure (off + 4) --- | Encode a number less than 1e9 as a decimal number, zero-padding it to --- nine digits. For example: 0 is encoded as @000000000@ and 5 is encoded as --- @000000005@. --- --- Precondition: Argument must be less than 1e9. Failure to satisfy this --- precondition will not result in a segfault, but the resulting bytes are --- undefined. The implemention uses a heuristic for division that is inaccurate --- for large numbers. +wordPaddedDec3 :: Word -> Builder 3 +wordPaddedDec3 !w = Unsafe.construct $ \arr off -> do + putRem10 + ( putRem10 $ + putRem10 + (\_ _ _ -> pure ()) + ) + arr + (off + 2) + w + pure (off + 3) + +{- | Encode a number less than 1e9 as a decimal number, zero-padding it to +nine digits. For example: 0 is encoded as @000000000@ and 5 is encoded as +@000000005@. + +Precondition: Argument must be less than 1e9. Failure to satisfy this +precondition will not result in a segfault, but the resulting bytes are +undefined. The implemention uses a heuristic for division that is inaccurate +for large numbers. +-} wordPaddedDec9 :: Word -> Builder 9 wordPaddedDec9 !w = Unsafe.construct $ \arr off -> do putRem10 - (putRem10 $ putRem10 $ putRem10 $ putRem10 $ putRem10 $ - putRem10 $ putRem10 $ putRem10 - (\_ _ _ -> pure ()) - ) arr (off + 8) w + ( putRem10 $ + putRem10 $ + putRem10 $ + putRem10 $ + putRem10 $ + putRem10 $ + putRem10 $ + putRem10 + (\_ _ _ -> pure ()) + ) + arr + (off + 8) + w pure (off + 9) putRem10 :: (MutableByteArray s -> Int -> Word -> ST s a) -> MutableByteArray s -> Int -> Word -> ST s a -{-# inline putRem10 #-} +{-# INLINE putRem10 #-} putRem10 andThen arr off dividend = do let quotient = approxDiv10 dividend remainder = dividend - (10 * quotient) writeByteArray arr off (unsafeWordToWord8 (remainder + 48)) andThen arr (off - 1) quotient --- | Encode an ASCII character. --- Precondition: Input must be an ASCII character. This is not checked. +{- | Encode an ASCII character. +Precondition: Input must be an ASCII character. This is not checked. +-} ascii :: Char -> Builder 1 ascii (C# c) = Unsafe.construct $ \(MutableByteArray arr) (I# off) -> do primitive_ (writeCharArray# arr off c) - pure (I# (off +# 1# )) + pure (I# (off +# 1#)) --- | Encode two ASCII characters. Precondition: Must be an ASCII characters. --- This is not checked. +{- | Encode two ASCII characters. Precondition: Must be an ASCII characters. +This is not checked. +-} ascii2 :: Char -> Char -> Builder 2 ascii2 (C# c0) (C# c1) = Unsafe.construct $ \(MutableByteArray arr) (I# off) -> do primitive_ (writeCharArray# arr off c0) - primitive_ (writeCharArray# arr (off +# 1# ) c1) - pure (I# (off +# 2# )) + primitive_ (writeCharArray# arr (off +# 1#) c1) + pure (I# (off +# 2#)) --- | Encode three ASCII characters. Precondition: Must be an ASCII characters. --- This is not checked. +{- | Encode three ASCII characters. Precondition: Must be an ASCII characters. +This is not checked. +-} ascii3 :: Char -> Char -> Char -> Builder 3 ascii3 (C# c0) (C# c1) (C# c2) = Unsafe.construct $ \(MutableByteArray arr) (I# off) -> do primitive_ (writeCharArray# arr off c0) - primitive_ (writeCharArray# arr (off +# 1# ) c1) - primitive_ (writeCharArray# arr (off +# 2# ) c2) - pure (I# (off +# 3# )) + primitive_ (writeCharArray# arr (off +# 1#) c1) + primitive_ (writeCharArray# arr (off +# 2#) c2) + pure (I# (off +# 3#)) --- | Encode four ASCII characters. Precondition: Must be an ASCII characters. --- This is not checked. +{- | Encode four ASCII characters. Precondition: Must be an ASCII characters. +This is not checked. +-} ascii4 :: Char -> Char -> Char -> Char -> Builder 4 ascii4 (C# c0) (C# c1) (C# c2) (C# c3) = Unsafe.construct $ \(MutableByteArray arr) (I# off) -> do primitive_ (writeCharArray# arr off c0) - primitive_ (writeCharArray# arr (off +# 1# ) c1) - primitive_ (writeCharArray# arr (off +# 2# ) c2) - primitive_ (writeCharArray# arr (off +# 3# ) c3) - pure (I# (off +# 4# )) + primitive_ (writeCharArray# arr (off +# 1#) c1) + primitive_ (writeCharArray# arr (off +# 2#) c2) + primitive_ (writeCharArray# arr (off +# 3#) c3) + pure (I# (off +# 4#)) --- | Encode five ASCII characters. Precondition: Must be an ASCII characters. --- This is not checked. +{- | Encode five ASCII characters. Precondition: Must be an ASCII characters. +This is not checked. +-} ascii5 :: Char -> Char -> Char -> Char -> Char -> Builder 5 ascii5 (C# c0) (C# c1) (C# c2) (C# c3) (C# c4) = Unsafe.construct $ \(MutableByteArray arr) (I# off) -> do primitive_ (writeCharArray# arr off c0) - primitive_ (writeCharArray# arr (off +# 1# ) c1) - primitive_ (writeCharArray# arr (off +# 2# ) c2) - primitive_ (writeCharArray# arr (off +# 3# ) c3) - primitive_ (writeCharArray# arr (off +# 4# ) c4) - pure (I# (off +# 5# )) + primitive_ (writeCharArray# arr (off +# 1#) c1) + primitive_ (writeCharArray# arr (off +# 2#) c2) + primitive_ (writeCharArray# arr (off +# 3#) c3) + primitive_ (writeCharArray# arr (off +# 4#) c4) + pure (I# (off +# 5#)) --- | Encode six ASCII characters. Precondition: Must be an ASCII characters. --- This is not checked. +{- | Encode six ASCII characters. Precondition: Must be an ASCII characters. +This is not checked. +-} ascii6 :: Char -> Char -> Char -> Char -> Char -> Char -> Builder 6 ascii6 (C# c0) (C# c1) (C# c2) (C# c3) (C# c4) (C# c5) = Unsafe.construct $ \(MutableByteArray arr) (I# off) -> do primitive_ (writeCharArray# arr off c0) - primitive_ (writeCharArray# arr (off +# 1# ) c1) - primitive_ (writeCharArray# arr (off +# 2# ) c2) - primitive_ (writeCharArray# arr (off +# 3# ) c3) - primitive_ (writeCharArray# arr (off +# 4# ) c4) - primitive_ (writeCharArray# arr (off +# 5# ) c5) - pure (I# (off +# 6# )) + primitive_ (writeCharArray# arr (off +# 1#) c1) + primitive_ (writeCharArray# arr (off +# 2#) c2) + primitive_ (writeCharArray# arr (off +# 3#) c3) + primitive_ (writeCharArray# arr (off +# 4#) c4) + primitive_ (writeCharArray# arr (off +# 5#) c5) + pure (I# (off +# 6#)) --- | Encode seven ASCII characters. Precondition: Must be an ASCII characters. --- This is not checked. +{- | Encode seven ASCII characters. Precondition: Must be an ASCII characters. +This is not checked. +-} ascii7 :: Char -> Char -> Char -> Char -> Char -> Char -> Char -> Builder 7 ascii7 (C# c0) (C# c1) (C# c2) (C# c3) (C# c4) (C# c5) (C# c6) = Unsafe.construct $ \(MutableByteArray arr) (I# off) -> do primitive_ (writeCharArray# arr off c0) - primitive_ (writeCharArray# arr (off +# 1# ) c1) - primitive_ (writeCharArray# arr (off +# 2# ) c2) - primitive_ (writeCharArray# arr (off +# 3# ) c3) - primitive_ (writeCharArray# arr (off +# 4# ) c4) - primitive_ (writeCharArray# arr (off +# 5# ) c5) - primitive_ (writeCharArray# arr (off +# 6# ) c6) - pure (I# (off +# 7# )) + primitive_ (writeCharArray# arr (off +# 1#) c1) + primitive_ (writeCharArray# arr (off +# 2#) c2) + primitive_ (writeCharArray# arr (off +# 3#) c3) + primitive_ (writeCharArray# arr (off +# 4#) c4) + primitive_ (writeCharArray# arr (off +# 5#) c5) + primitive_ (writeCharArray# arr (off +# 6#) c6) + pure (I# (off +# 7#)) --- | Encode eight ASCII characters. Precondition: Must be an ASCII characters. --- This is not checked. +{- | Encode eight ASCII characters. Precondition: Must be an ASCII characters. +This is not checked. +-} ascii8 :: Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Builder 8 ascii8 (C# c0) (C# c1) (C# c2) (C# c3) (C# c4) (C# c5) (C# c6) (C# c7) = Unsafe.construct $ \(MutableByteArray arr) (I# off) -> do primitive_ (writeCharArray# arr off c0) - primitive_ (writeCharArray# arr (off +# 1# ) c1) - primitive_ (writeCharArray# arr (off +# 2# ) c2) - primitive_ (writeCharArray# arr (off +# 3# ) c3) - primitive_ (writeCharArray# arr (off +# 4# ) c4) - primitive_ (writeCharArray# arr (off +# 5# ) c5) - primitive_ (writeCharArray# arr (off +# 6# ) c6) - primitive_ (writeCharArray# arr (off +# 7# ) c7) - pure (I# (off +# 8# )) + primitive_ (writeCharArray# arr (off +# 1#) c1) + primitive_ (writeCharArray# arr (off +# 2#) c2) + primitive_ (writeCharArray# arr (off +# 3#) c3) + primitive_ (writeCharArray# arr (off +# 4#) c4) + primitive_ (writeCharArray# arr (off +# 5#) c5) + primitive_ (writeCharArray# arr (off +# 6#) c6) + primitive_ (writeCharArray# arr (off +# 7#) c7) + pure (I# (off +# 8#)) -- | Encode a machine-sized word with VLQ (also known as VByte, Varint, VInt). wordVlq :: Word -> Builder 10 -{-# inline wordVlq #-} +{-# INLINE wordVlq #-} wordVlq (W# w) = vlqCommon (W# w) -- | Encode a 32-bit word with VLQ (also known as VByte, Varint, VInt). word32Vlq :: Word32 -> Builder 5 -{-# inline word32Vlq #-} +{-# INLINE word32Vlq #-} word32Vlq (W32# w) = vlqCommon (W# (C.word32ToWord# w)) -- | Encode a 64-bit word with VLQ (also known as VByte, Varint, VInt). word64Vlq :: Word64 -> Builder 10 -{-# inline word64Vlq #-} -word64Vlq (W64# w) = vlqCommon (W# -#if MIN_VERSION_base(4,17,0) - (word64ToWord# w) -#else - w -#endif - ) +{-# INLINE word64Vlq #-} +word64Vlq (W64# w) = vlqCommon (W# (word64ToWord# w)) -- | Encode a machine-sized word with LEB-128. wordLEB128 :: Word -> Builder 10 -{-# inline wordLEB128 #-} +{-# INLINE wordLEB128 #-} wordLEB128 (W# w) = lebCommon (W# w) +-- | Encode a 32-bit word with LEB-128. +word16LEB128 :: Word16 -> Builder 3 +{-# INLINE word16LEB128 #-} +word16LEB128 (W16# w) = lebCommon (W# (C.word16ToWord# w)) + -- | Encode a 32-bit word with LEB-128. word32LEB128 :: Word32 -> Builder 5 -{-# inline word32LEB128 #-} +{-# INLINE word32LEB128 #-} word32LEB128 (W32# w) = lebCommon (W# (C.word32ToWord# w)) -- | Encode a 64-bit word with LEB-128. word64LEB128 :: Word64 -> Builder 10 -{-# inline word64LEB128 #-} -word64LEB128 (W64# w) = lebCommon (W# -#if MIN_VERSION_base(4,17,0) - (word64ToWord# w) -#else - w -#endif - ) +{-# INLINE word64LEB128 #-} +word64LEB128 (W64# w) = lebCommon (W# (word64ToWord# w)) vlqCommon :: Word -> Builder n vlqCommon !w = case w of @@ -947,23 +999,25 @@ vlqCommon !w = case w of in vlqStep startIx w vlqStep :: - Int -- start index, must be in range [0,63] and 7 must divide it evenly - -> Word - -> Builder n + Int -> -- start index, must be in range [0,63] and 7 must divide it evenly + Word -> + Builder n vlqStep !ix !w | ix <= 0 = unsafeWord8 (unsafeWordToWord8 (unsafeShiftR w ix .&. 0b0111_1111)) - | otherwise = unsafeAppend - (unsafeWord8 (unsafeWordToWord8 (unsafeShiftR w ix .|. 0b1000_0000))) - (vlqStep (ix - 7) w) + | otherwise = + unsafeAppend + (unsafeWord8 (unsafeWordToWord8 (unsafeShiftR w ix .|. 0b1000_0000))) + (vlqStep (ix - 7) w) lebCommon :: Word -> Builder n lebCommon !w = case quotRem w 128 of - (q,r) -> case q of + (q, r) -> case q of 0 -> unsafeWord8 (unsafeWordToWord8 r) - _ -> unsafeAppend - (unsafeWord8 (unsafeWordToWord8 (r .|. 0x80))) - (lebCommon q) + _ -> + unsafeAppend + (unsafeWord8 (unsafeWordToWord8 (r .|. 0x80))) + (lebCommon q) -- | Encode a character as UTF-8. This only uses as much space as is required. char :: Char -> Builder 4 @@ -972,68 +1026,62 @@ char c writeByteArray arr off (unsafeWordToWord8 codepoint) pure (off + 1) | codepoint < 0x800 = Unsafe.construct $ \arr off -> do - writeByteArray arr off (unsafeWordToWord8 (byteTwoOne codepoint)) + writeByteArray arr off (unsafeWordToWord8 (byteTwoOne codepoint)) writeByteArray arr (off + 1) (unsafeWordToWord8 (byteTwoTwo codepoint)) return (off + 2) | codepoint >= 0xD800 && codepoint < 0xE000 = Unsafe.construct $ \arr off -> do -- Codepoint U+FFFD - writeByteArray arr off (0xEF :: Word8) + writeByteArray arr off (0xEF :: Word8) writeByteArray arr (off + 1) (0xBF :: Word8) writeByteArray arr (off + 2) (0xBD :: Word8) return (off + 3) | codepoint < 0x10000 = Unsafe.construct $ \arr off -> do - writeByteArray arr off (unsafeWordToWord8 (byteThreeOne codepoint)) + writeByteArray arr off (unsafeWordToWord8 (byteThreeOne codepoint)) writeByteArray arr (off + 1) (unsafeWordToWord8 (byteThreeTwo codepoint)) writeByteArray arr (off + 2) (unsafeWordToWord8 (byteThreeThree codepoint)) return (off + 3) | otherwise = Unsafe.construct $ \arr off -> do - writeByteArray arr off (unsafeWordToWord8 (byteFourOne codepoint)) + writeByteArray arr off (unsafeWordToWord8 (byteFourOne codepoint)) writeByteArray arr (off + 1) (unsafeWordToWord8 (byteFourTwo codepoint)) writeByteArray arr (off + 2) (unsafeWordToWord8 (byteFourThree codepoint)) writeByteArray arr (off + 3) (unsafeWordToWord8 (byteFourFour codepoint)) return (off + 4) + where + codepoint :: Word + codepoint = fromIntegral (ord c) - where - codepoint :: Word - codepoint = fromIntegral (ord c) + -- precondition: codepoint is less than 0x800 + byteTwoOne :: Word -> Word + byteTwoOne w = unsafeShiftR w 6 .|. 0b11000000 - -- precondition: codepoint is less than 0x800 - byteTwoOne :: Word -> Word - byteTwoOne w = unsafeShiftR w 6 .|. 0b11000000 + byteTwoTwo :: Word -> Word + byteTwoTwo w = (w .&. 0b00111111) .|. 0b10000000 - byteTwoTwo :: Word -> Word - byteTwoTwo w = (w .&. 0b00111111) .|. 0b10000000 + -- precondition: codepoint is less than 0x1000 + byteThreeOne :: Word -> Word + byteThreeOne w = unsafeShiftR w 12 .|. 0b11100000 - -- precondition: codepoint is less than 0x1000 - byteThreeOne :: Word -> Word - byteThreeOne w = unsafeShiftR w 12 .|. 0b11100000 + byteThreeTwo :: Word -> Word + byteThreeTwo w = (0b00111111 .&. unsafeShiftR w 6) .|. 0b10000000 - byteThreeTwo :: Word -> Word - byteThreeTwo w = (0b00111111 .&. unsafeShiftR w 6) .|. 0b10000000 + byteThreeThree :: Word -> Word + byteThreeThree w = (w .&. 0b00111111) .|. 0b10000000 - byteThreeThree :: Word -> Word - byteThreeThree w = (w .&. 0b00111111) .|. 0b10000000 + -- precondition: codepoint is less than 0x110000 + byteFourOne :: Word -> Word + byteFourOne w = unsafeShiftR w 18 .|. 0b11110000 - -- precondition: codepoint is less than 0x110000 - byteFourOne :: Word -> Word - byteFourOne w = unsafeShiftR w 18 .|. 0b11110000 + byteFourTwo :: Word -> Word + byteFourTwo w = (0b00111111 .&. unsafeShiftR w 12) .|. 0b10000000 - byteFourTwo :: Word -> Word - byteFourTwo w = (0b00111111 .&. unsafeShiftR w 12) .|. 0b10000000 + byteFourThree :: Word -> Word + byteFourThree w = (0b00111111 .&. unsafeShiftR w 6) .|. 0b10000000 - byteFourThree :: Word -> Word - byteFourThree w = (0b00111111 .&. unsafeShiftR w 6) .|. 0b10000000 - - byteFourFour :: Word -> Word - byteFourFour w = (0b00111111 .&. w) .|. 0b10000000 + byteFourFour :: Word -> Word + byteFourFour w = (0b00111111 .&. w) .|. 0b10000000 int64BE :: Int64 -> Builder 8 -int64BE (I64# i) = word64BE (W64# ( -#if MIN_VERSION_base(4,17,0) - wordToWord64# (int2Word# (int64ToInt# i)))) -#else - int2Word# i)) -#endif +int64BE (I64# i) = word64BE (W64# (wordToWord64# (int2Word# (int64ToInt# i)))) int32BE :: Int32 -> Builder 4 int32BE (I32# i) = word32BE (W32# (C.wordToWord32# (int2Word# (C.int32ToInt# i)))) @@ -1042,13 +1090,7 @@ int16BE :: Int16 -> Builder 2 int16BE (I16# i) = word16BE (W16# (C.wordToWord16# (int2Word# (C.int16ToInt# i)))) int64LE :: Int64 -> Builder 8 -int64LE (I64# i) = word64LE (W64# ( -#if MIN_VERSION_base(4,17,0) - wordToWord64# (int2Word# (int64ToInt# i)))) -#else - int2Word# i)) -#endif - +int64LE (I64# i) = word64LE (W64# (wordToWord64# (int2Word# (int64ToInt# i)))) int32LE :: Int32 -> Builder 4 int32LE (I32# i) = word32LE (W32# (C.wordToWord32# (int2Word# (C.int32ToInt# i)))) @@ -1068,8 +1110,9 @@ word256LE (Word256 hi mhi mlo lo) = word64LE lo `append` word64LE mlo `append` w word256BE :: Word256 -> Builder 32 word256BE (Word256 hi mhi mlo lo) = word64BE hi `append` word64BE mhi `append` word64BE mlo `append` word64BE lo --- | Requires exactly 8 bytes. Dump the octets of a 64-bit --- word in a little-endian fashion. +{- | Requires exactly 8 bytes. Dump the octets of a 64-bit +word in a little-endian fashion. +-} word64LE :: Word64 -> Builder 8 word64LE w = Unsafe.construct $ \arr off -> do writeByteArray arr (off + 7) (fromIntegral @Word64 @Word8 (unsafeShiftR w 56)) @@ -1079,14 +1122,15 @@ word64LE w = Unsafe.construct $ \arr off -> do writeByteArray arr (off + 3) (fromIntegral @Word64 @Word8 (unsafeShiftR w 24)) writeByteArray arr (off + 2) (fromIntegral @Word64 @Word8 (unsafeShiftR w 16)) writeByteArray arr (off + 1) (fromIntegral @Word64 @Word8 (unsafeShiftR w 8)) - writeByteArray arr (off ) (fromIntegral @Word64 @Word8 w) + writeByteArray arr (off) (fromIntegral @Word64 @Word8 w) pure (off + 8) --- | Requires exactly 8 bytes. Dump the octets of a 64-bit --- word in a big-endian fashion. +{- | Requires exactly 8 bytes. Dump the octets of a 64-bit +word in a big-endian fashion. +-} word64BE :: Word64 -> Builder 8 word64BE w = Unsafe.construct $ \arr off -> do - writeByteArray arr (off ) (fromIntegral @Word64 @Word8 (unsafeShiftR w 56)) + writeByteArray arr (off) (fromIntegral @Word64 @Word8 (unsafeShiftR w 56)) writeByteArray arr (off + 1) (fromIntegral @Word64 @Word8 (unsafeShiftR w 48)) writeByteArray arr (off + 2) (fromIntegral @Word64 @Word8 (unsafeShiftR w 40)) writeByteArray arr (off + 3) (fromIntegral @Word64 @Word8 (unsafeShiftR w 32)) @@ -1096,39 +1140,43 @@ word64BE w = Unsafe.construct $ \arr off -> do writeByteArray arr (off + 7) (fromIntegral @Word64 @Word8 w) pure (off + 8) --- | Requires exactly 4 bytes. Dump the octets of a 32-bit --- word in a little-endian fashion. +{- | Requires exactly 4 bytes. Dump the octets of a 32-bit +word in a little-endian fashion. +-} word32LE :: Word32 -> Builder 4 word32LE w = Unsafe.construct $ \arr off -> do writeByteArray arr (off + 3) (fromIntegral @Word32 @Word8 (unsafeShiftR w 24)) writeByteArray arr (off + 2) (fromIntegral @Word32 @Word8 (unsafeShiftR w 16)) writeByteArray arr (off + 1) (fromIntegral @Word32 @Word8 (unsafeShiftR w 8)) - writeByteArray arr (off ) (fromIntegral @Word32 @Word8 w) + writeByteArray arr (off) (fromIntegral @Word32 @Word8 w) pure (off + 4) --- | Requires exactly 4 bytes. Dump the octets of a 32-bit --- word in a big-endian fashion. +{- | Requires exactly 4 bytes. Dump the octets of a 32-bit +word in a big-endian fashion. +-} word32BE :: Word32 -> Builder 4 word32BE w = Unsafe.construct $ \arr off -> do - writeByteArray arr (off ) (fromIntegral @Word32 @Word8 (unsafeShiftR w 24)) + writeByteArray arr (off) (fromIntegral @Word32 @Word8 (unsafeShiftR w 24)) writeByteArray arr (off + 1) (fromIntegral @Word32 @Word8 (unsafeShiftR w 16)) writeByteArray arr (off + 2) (fromIntegral @Word32 @Word8 (unsafeShiftR w 8)) writeByteArray arr (off + 3) (fromIntegral @Word32 @Word8 w) pure (off + 4) --- | Requires exactly 2 bytes. Dump the octets of a 16-bit --- word in a little-endian fashion. +{- | Requires exactly 2 bytes. Dump the octets of a 16-bit +word in a little-endian fashion. +-} word16LE :: Word16 -> Builder 2 word16LE w = Unsafe.construct $ \arr off -> do writeByteArray arr (off + 1) (fromIntegral @Word16 @Word8 (unsafeShiftR w 8)) - writeByteArray arr (off ) (fromIntegral @Word16 @Word8 w) + writeByteArray arr (off) (fromIntegral @Word16 @Word8 w) pure (off + 2) --- | Requires exactly 2 bytes. Dump the octets of a 16-bit --- word in a big-endian fashion. +{- | Requires exactly 2 bytes. Dump the octets of a 16-bit +word in a big-endian fashion. +-} word16BE :: Word16 -> Builder 2 word16BE w = Unsafe.construct $ \arr off -> do - writeByteArray arr (off ) (fromIntegral @Word16 @Word8 (unsafeShiftR w 8)) + writeByteArray arr (off) (fromIntegral @Word16 @Word8 (unsafeShiftR w 8)) writeByteArray arr (off + 1) (fromIntegral @Word16 @Word8 w) pure (off + 2) @@ -1145,16 +1193,18 @@ unsafeWord8 w = Unsafe.construct $ \arr off -> do -- Reverse the bytes in the designated slice. This takes -- an inclusive start offset and an inclusive end offset. reverseBytes :: MutableByteArray s -> Int -> Int -> ST s () -{-# inline reverseBytes #-} -reverseBytes arr begin end = go begin end where - go ixA ixB = if ixA < ixB - then do - a :: Word8 <- readByteArray arr ixA - b :: Word8 <- readByteArray arr ixB - writeByteArray arr ixA b - writeByteArray arr ixB a - go (ixA + 1) (ixB - 1) - else pure () +{-# INLINE reverseBytes #-} +reverseBytes arr begin end = go begin end + where + go ixA ixB = + if ixA < ixB + then do + a :: Word8 <- readByteArray arr ixA + b :: Word8 <- readByteArray arr ixB + writeByteArray arr ixA b + writeByteArray arr ixB a + go (ixA + 1) (ixB - 1) + else pure () c2w :: Char -> Word8 c2w = fromIntegral . ord @@ -1168,8 +1218,13 @@ shrinkMutableByteArray (MutableByteArray arr) (I# sz) = -- inaccurate. This is very visible when encoding a number like 2.25, which -- is perfectly represented as an IEEE 754 floating point number but is goofed -- up by this function. -doubleDec# :: forall s. - Double# -> MutableByteArray# s -> Int# -> State# s -> (# State# s, Int# #) +doubleDec# :: + forall s. + Double# -> + MutableByteArray# s -> + Int# -> + State# s -> + (# State# s, Int# #) doubleDec# d# marr# off# s0 = case unsafeIOToST (c_paste_double marr# off# d#) of ST f -> case f s0 of @@ -1183,5 +1238,6 @@ approxDiv10 !n = unsafeShiftR (0x1999999A * n) 32 unsafeWordToWord8 :: Word -> Word8 unsafeWordToWord8 (W# w) = W8# (C.wordToWord8# w) -foreign import ccall unsafe "bytebuild_paste_double" c_paste_double :: - MutableByteArray# s -> Int# -> Double# -> IO Int +foreign import ccall unsafe "bytebuild_paste_double" + c_paste_double :: + MutableByteArray# s -> Int# -> Double# -> IO Int diff --git a/src/Data/Bytes/Builder/Bounded/Class.hs b/src/Data/Bytes/Builder/Bounded/Class.hs index 9a630d1..29c16e5 100644 --- a/src/Data/Bytes/Builder/Bounded/Class.hs +++ b/src/Data/Bytes/Builder/Bounded/Class.hs @@ -1,8 +1,8 @@ -{-# language DataKinds #-} -{-# language TypeFamilies #-} +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE TypeFamilies #-} module Data.Bytes.Builder.Bounded.Class - ( ToBoundedBuilder(..) + ( ToBoundedBuilder (..) ) where import Data.Int @@ -11,14 +11,15 @@ import Data.Word import qualified Data.Bytes.Builder.Bounded as Bounded import qualified GHC.TypeNats as GHC --- | Variant of To that can be encoded as a builder. Human-readable encodings --- are used when possible. For example, numbers are encoded an ascii-encoded --- decimal characters. UTF-8 is preferred for textual types. For types --- that represent arbitrary bytes (e.g. Bytes, ByteString), the bytes --- are preserved. --- --- The goal of this typeclass is to reduce the size of builders produced --- by quasiquotation. +{- | Variant of To that can be encoded as a builder. Human-readable encodings +are used when possible. For example, numbers are encoded an ascii-encoded +decimal characters. UTF-8 is preferred for textual types. For types +that represent arbitrary bytes (e.g. Bytes, ByteString), the bytes +are preserved. + +The goal of this typeclass is to reduce the size of builders produced +by quasiquotation. +-} class ToBoundedBuilder a where type BoundedBuilderLength a :: GHC.Nat toBuilder :: a -> Bounded.Builder (BoundedBuilderLength a) diff --git a/src/Data/Bytes/Builder/Bounded/Unsafe.hs b/src/Data/Bytes/Builder/Bounded/Unsafe.hs index 3c812bc..c983e04 100644 --- a/src/Data/Bytes/Builder/Bounded/Unsafe.hs +++ b/src/Data/Bytes/Builder/Bounded/Unsafe.hs @@ -1,62 +1,66 @@ -{-# language DataKinds #-} -{-# language GADTSyntax #-} -{-# language KindSignatures #-} -{-# language MagicHash #-} -{-# language RankNTypes #-} -{-# language ScopedTypeVariables #-} -{-# language UnboxedTuples #-} +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE GADTSyntax #-} +{-# LANGUAGE KindSignatures #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE UnboxedTuples #-} module Data.Bytes.Builder.Bounded.Unsafe ( -- * Types - Builder(..) + Builder (..) + -- * Construct , construct + -- * Run , pasteST , pasteIO ) where import Data.Kind (Type) -import Data.Primitive (MutableByteArray(..)) -import GHC.Exts (Int(I#),RealWorld,Int#,State#,MutableByteArray#) +import Data.Primitive (MutableByteArray (..)) +import GHC.Exts (Int (I#), Int#, MutableByteArray#, RealWorld, State#) import GHC.IO (stToIO) -import GHC.ST (ST(ST)) +import GHC.ST (ST (ST)) import GHC.TypeLits (Nat) --- | A builder parameterized by the maximum number of bytes it uses --- when executed. +{- | A builder parameterized by the maximum number of bytes it uses +when executed. +-} newtype Builder :: Nat -> Type where - Builder :: - (forall s. MutableByteArray# s -> Int# -> State# s -> (# State# s, Int# #)) - -- ^ This function takes a buffer, an offset, and a number of remaining bytes. - -- It returns the new offset. - -> Builder n - + Builder :: + -- | This function takes a buffer, an offset, and a number of remaining bytes. + -- It returns the new offset. + (forall s. MutableByteArray# s -> Int# -> State# s -> (# State# s, Int# #)) -> + Builder n --- | Constructor for 'Builder' that works on a function with lifted --- arguments instead of unlifted ones. This is just as unsafe as the --- actual constructor. +{- | Constructor for 'Builder' that works on a function with lifted +arguments instead of unlifted ones. This is just as unsafe as the +actual constructor. +-} construct :: (forall s. MutableByteArray s -> Int -> ST s Int) -> Builder n -{-# inline construct #-} -construct f = Builder - $ \arr off s0 -> +{-# INLINE construct #-} +construct f = Builder $ + \arr off s0 -> case unST (f (MutableByteArray arr) (I# off)) s0 of (# s1, (I# n) #) -> (# s1, n #) --- | This function does not enforce the known upper bound on the --- size. It is up to the user to do this. +{- | This function does not enforce the known upper bound on the +size. It is up to the user to do this. +-} pasteST :: Builder n -> MutableByteArray s -> Int -> ST s Int -{-# inline pasteST #-} +{-# INLINE pasteST #-} pasteST (Builder f) (MutableByteArray arr) (I# off) = ST $ \s0 -> case f arr off s0 of (# s1, r #) -> (# s1, (I# r) #) --- | This function does not enforce the known upper bound on the --- size. It is up to the user to do this. +{- | This function does not enforce the known upper bound on the +size. It is up to the user to do this. +-} pasteIO :: Builder n -> MutableByteArray RealWorld -> Int -> IO Int -{-# inline pasteIO #-} +{-# INLINE pasteIO #-} pasteIO b m off = stToIO (pasteST b m off) unST :: ST s a -> State# s -> (# State# s, a #) unST (ST f) = f - diff --git a/src/Data/Bytes/Builder/Class.hs b/src/Data/Bytes/Builder/Class.hs index bc9440c..fcaf303 100644 --- a/src/Data/Bytes/Builder/Class.hs +++ b/src/Data/Bytes/Builder/Class.hs @@ -1,13 +1,12 @@ {-# LANGUAGE FlexibleInstances #-} -{-# LANGUAGE TypeSynonymInstances #-} module Data.Bytes.Builder.Class - ( ToBuilder(..) + ( ToBuilder (..) ) where +import Data.ByteString.Short (ShortByteString) import Data.Bytes (Bytes) import Data.Bytes.Builder (Builder) -import Data.ByteString.Short (ShortByteString) import Data.Int import Data.Primitive.ByteArray (ByteArray) import Data.Text.Short (ShortText) @@ -15,14 +14,15 @@ import Data.Word import qualified Data.Bytes.Builder as Builder --- | Types that can be encoded as a builder. Human-readable encodings --- are used when possible. For example, numbers are encoded an ascii-encoded --- decimal characters. UTF-8 is preferred for textual types. For types --- that represent arbitrary bytes (e.g. Bytes, ByteString), the bytes --- are preserved. --- --- The goal of this typeclass is to reduce the size of builders produced --- by quasiquotation. +{- | Types that can be encoded as a builder. Human-readable encodings +are used when possible. For example, numbers are encoded an ascii-encoded +decimal characters. UTF-8 is preferred for textual types. For types +that represent arbitrary bytes (e.g. Bytes, ByteString), the bytes +are preserved. + +The goal of this typeclass is to reduce the size of builders produced +by quasiquotation. +-} class ToBuilder a where toBuilder :: a -> Builder diff --git a/src/Data/Bytes/Builder/Template.hs b/src/Data/Bytes/Builder/Template.hs index 1fd775c..f2b95dd 100644 --- a/src/Data/Bytes/Builder/Template.hs +++ b/src/Data/Bytes/Builder/Template.hs @@ -9,50 +9,53 @@ module Data.Bytes.Builder.Template import Control.Monad (when) import Data.Bytes.Builder.Class (toBuilder) -import GHC.Ptr (Ptr(Ptr)) +import GHC.Ptr (Ptr (Ptr)) import Language.Haskell.Meta.Parse (parseExp) -import Language.Haskell.TH (Q,Exp) -import Language.Haskell.TH.Lib (integerL,stringPrimL,litE) -import Language.Haskell.TH.Quote (QuasiQuoter(..)) +import Language.Haskell.TH (Exp, Q) +import Language.Haskell.TH.Lib (integerL, litE, stringPrimL) +import Language.Haskell.TH.Quote (QuasiQuoter (..)) -import qualified Data.Bytes.Builder as Builder import qualified Data.ByteString.Short as SBS +import qualified Data.Bytes.Builder as Builder import qualified Data.Text.Short as TS import qualified Language.Haskell.TH as TH --- | A quasiquoter for builders. Haskell expressions are interpolated --- with backticks, and the @ToBuilder@ class is used to convert them --- to builders. Several common escape sequences for whitespace and --- control characters are recongized. Consider the following expression, --- where the binding @partition@ has type @Word32@: --- --- > [templ|[WARN] Partition `partition` has invalid data.\n|] --- --- This expression has type @Builder@ and expands to: --- --- > Builder.cstringLen (Ptr "[WARN] Partition "#, 17) <> --- > Builder.toBuilder partition <> --- > Builder.cstringLen (Ptr " has invalid data.\n"#, 19) --- --- The @ToBuilder@ instance for @Word32@ uses decimal encoding, so this --- would result in the following if @partition@ was 42 (with a newline --- character at the end): --- --- > [WARN] Partition 42 has invalid data. --- --- In the future, a more sophisticated @bbldr@ variant will be added --- that will support expressions where the maximum length of the entire --- builder can be computed at compile time. +{- | A quasiquoter for builders. Haskell expressions are interpolated +with backticks, and the @ToBuilder@ class is used to convert them +to builders. Several common escape sequences for whitespace and +control characters are recongized. Consider the following expression, +where the binding @partition@ has type @Word32@: + +> [templ|[WARN] Partition `partition` has invalid data.\n|] + +This expression has type @Builder@ and expands to: + +> Builder.cstringLen (Ptr "[WARN] Partition "#, 17) <> +> Builder.toBuilder partition <> +> Builder.cstringLen (Ptr " has invalid data.\n"#, 19) + +The @ToBuilder@ instance for @Word32@ uses decimal encoding, so this +would result in the following if @partition@ was 42 (with a newline +character at the end): + +> [WARN] Partition 42 has invalid data. + +In the future, a more sophisticated @bbldr@ variant will be added +that will support expressions where the maximum length of the entire +builder can be computed at compile time. +-} bldr :: QuasiQuoter -bldr = QuasiQuoter - { quoteExp = templExp - , quotePat = notHandled "patterns" - , quoteType = notHandled "types" - , quoteDec = notHandled "declarations" - } - where - notHandled things _ = fail $ - things ++ "are not handled by the byte template quasiquoter" +bldr = + QuasiQuoter + { quoteExp = templExp + , quotePat = notHandled "patterns" + , quoteType = notHandled "types" + , quoteDec = notHandled "declarations" + } + where + notHandled things _ = + fail $ + things ++ "are not handled by the byte template quasiquoter" templExp :: String -> Q Exp templExp inp = do @@ -62,7 +65,7 @@ templExp inp = do Right [] -> fail "empty template" Right v -> pure v let expParts = compile <$> rawParts - foldl1 (\e1 e2 -> [| $e1 <> $e2 |]) expParts + foldl1 (\e1 e2 -> [|$e1 <> $e2|]) expParts checkOverloadedStrings :: Q () checkOverloadedStrings = do @@ -87,40 +90,40 @@ compile (Splice str) = case parseExp str of parse :: String -> Either String Template parse = partsLoop - where + where partsLoop "" = do pure [] - partsLoop ('`':inp) = do + partsLoop ('`' : inp) = do (!spl, !rest) <- spliceLoop inp - (Splice spl:) <$> partsLoop rest + (Splice spl :) <$> partsLoop rest partsLoop inp = do (!lit, !rest) <- litLoop "" inp - (Literal lit:) <$> partsLoop rest + (Literal lit :) <$> partsLoop rest litLoop :: String -> String -> Either String (String, String) litLoop !acc rest@"" = pure (reverse acc, rest) - litLoop !acc rest@('`':_) = pure (reverse acc, rest) - litLoop !acc ('\\':next) = do + litLoop !acc rest@('`' : _) = pure (reverse acc, rest) + litLoop !acc ('\\' : next) = do (c, rest) <- parseEscape next - litLoop (c:acc) rest - litLoop !acc (c:rest) = litLoop (c:acc) rest + litLoop (c : acc) rest + litLoop !acc (c : rest) = litLoop (c : acc) rest spliceLoop :: String -> Either String (String, String) spliceLoop inp = case break (== '`') inp of ([], _) -> Left "internal error" - (hs, '`':rest) -> pure (hs, rest) - (_, _:_) -> Left "internal error" + (hs, '`' : rest) -> pure (hs, rest) + (_, _ : _) -> Left "internal error" (_, []) -> Left "unterminated interpolation" parseEscape :: String -> Either String (Char, String) parseEscape "" = Left "incomplete escape" - parseEscape ('\\':rest) = pure ('\\', rest) - parseEscape ('`':rest) = pure ('`', rest) - parseEscape ('\'':rest) = pure ('\'', rest) - parseEscape ('\"':rest) = pure ('\"', rest) - parseEscape ('0':rest) = pure ('\0', rest) - parseEscape ('a':rest) = pure ('\a', rest) - parseEscape ('b':rest) = pure ('\b', rest) - parseEscape ('f':rest) = pure ('\f', rest) - parseEscape ('n':rest) = pure ('\n', rest) - parseEscape ('r':rest) = pure ('\r', rest) - parseEscape ('t':rest) = pure ('\t', rest) - parseEscape ('v':rest) = pure ('\v', rest) - parseEscape (c:_) = Left $ "unrecognized escape: \\" ++ [c] + parseEscape ('\\' : rest) = pure ('\\', rest) + parseEscape ('`' : rest) = pure ('`', rest) + parseEscape ('\'' : rest) = pure ('\'', rest) + parseEscape ('\"' : rest) = pure ('\"', rest) + parseEscape ('0' : rest) = pure ('\0', rest) + parseEscape ('a' : rest) = pure ('\a', rest) + parseEscape ('b' : rest) = pure ('\b', rest) + parseEscape ('f' : rest) = pure ('\f', rest) + parseEscape ('n' : rest) = pure ('\n', rest) + parseEscape ('r' : rest) = pure ('\r', rest) + parseEscape ('t' : rest) = pure ('\t', rest) + parseEscape ('v' : rest) = pure ('\v', rest) + parseEscape (c : _) = Left $ "unrecognized escape: \\" ++ [c] diff --git a/src/Data/Bytes/Builder/Unsafe.hs b/src/Data/Bytes/Builder/Unsafe.hs index ab3a29d..bcfbdf8 100644 --- a/src/Data/Bytes/Builder/Unsafe.hs +++ b/src/Data/Bytes/Builder/Unsafe.hs @@ -1,55 +1,60 @@ -{-# language BangPatterns #-} -{-# language DuplicateRecordFields #-} -{-# language LambdaCase #-} -{-# language MagicHash #-} -{-# language RankNTypes #-} -{-# language ScopedTypeVariables #-} -{-# language UnboxedTuples #-} +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE DuplicateRecordFields #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE UnboxedTuples #-} module Data.Bytes.Builder.Unsafe ( -- * Types - Builder(..) - , BuilderState(..) - , Commits(..) + Builder (..) + , BuilderState (..) + , Commits (..) + -- * Execution , pasteST , pasteIO + -- * Construction , fromEffect + -- * Builder State , newBuilderState , closeBuilderState + -- * Finalization , reverseCommitsOntoChunks , commitsOntoChunks , copyReverseCommits , addCommitsLength + -- * Commit Distance , commitDistance , commitDistance1 + -- * Safe Functions + -- | These functions are actually completely safe, but they are defined -- here because they are used by typeclass instances. Import them from -- @Data.Bytes.Builder@ instead. , stringUtf8 , cstring + -- * Pasting with Preconditions , pasteUtf8TextJson# ) where import Control.Monad.Primitive (primitive_) -import Data.Bytes.Chunks (Chunks(ChunksCons)) -import Data.Bytes.Types (Bytes(Bytes)) +import Data.Bytes.Chunks (Chunks (ChunksCons)) +import Data.Bytes.Types (Bytes (Bytes)) import Data.Char (ord) -import Data.Primitive (MutableByteArray(..),ByteArray(..)) +import Data.Primitive (ByteArray (..), MutableByteArray (..)) import Data.Word (Word8) import Foreign.C.String (CString) -import GHC.Base (unpackCString#,unpackCStringUtf8#) -import GHC.Exts ((-#),(+#),(>#),(>=#),Char(C#)) -import GHC.Exts (Addr#,ByteArray#,MutableByteArray#,Int(I#),Ptr(Ptr)) -import GHC.Exts (RealWorld,IsString,Int#,State#) +import GHC.Base (unpackCString#, unpackCStringUtf8#) +import GHC.Exts (Addr#, ByteArray#, Char (C#), Int (I#), Int#, IsString, MutableByteArray#, Ptr (Ptr), RealWorld, State#, (+#), (-#), (>#), (>=#)) import GHC.IO (stToIO) -import GHC.ST (ST(ST)) +import GHC.ST (ST (ST)) import qualified Compat as C import qualified Data.Bytes.Builder.Bounded as Bounded @@ -58,46 +63,52 @@ import qualified Data.Primitive as PM import qualified GHC.Exts as Exts import qualified Op --- | An unmaterialized sequence of bytes that may be pasted --- into a mutable byte array. +{- | An unmaterialized sequence of bytes that may be pasted +into a mutable byte array. +-} newtype Builder - = Builder (forall s. - MutableByteArray# s -> -- buffer we are currently writing to - Int# -> -- offset into the current buffer - Int# -> -- number of bytes remaining in the current buffer - Commits s -> -- buffers and immutable byte slices that we have already committed - State# s -> - (# State# s, MutableByteArray# s, Int#, Int#, Commits s #) -- all the same things - ) + = Builder + ( forall s. + MutableByteArray# s -> -- buffer we are currently writing to + Int# -> -- offset into the current buffer + Int# -> -- number of bytes remaining in the current buffer + Commits s -> -- buffers and immutable byte slices that we have already committed + State# s -> + (# State# s, MutableByteArray# s, Int#, Int#, Commits s #) -- all the same things + ) --- | A list of committed chunks along with the chunk currently being --- written to. This is kind of like a non-empty variant of 'Commmits' --- but with the additional invariant that the head chunk is a mutable --- byte array. -data BuilderState s = BuilderState - (MutableByteArray# s) -- buffer we are currently writing to - Int# -- offset into the current buffer - Int# -- number of bytes remaining in the current buffer - !(Commits s) -- buffers and immutable byte slices that are already committed +{- | A list of committed chunks along with the chunk currently being +written to. This is kind of like a non-empty variant of 'Commmits' +but with the additional invariant that the head chunk is a mutable +byte array. +-} +data BuilderState s + = BuilderState + (MutableByteArray# s) -- buffer we are currently writing to + Int# -- offset into the current buffer + Int# -- number of bytes remaining in the current buffer + !(Commits s) -- buffers and immutable byte slices that are already committed -- | Create an empty 'BuilderState' with a buffer of the given size. newBuilderState :: Int -> ST s (BuilderState s) -{-# inline newBuilderState #-} -newBuilderState n@(I# n# ) = do +{-# INLINE newBuilderState #-} +newBuilderState n@(I# n#) = do MutableByteArray buf <- PM.newByteArray n pure (BuilderState buf 0# n# Initial) --- | Push the active chunk onto the top of the commits. --- The @BuilderState@ argument must not be reused after being passed --- to this function. That is, its use must be affine. +{- | Push the active chunk onto the top of the commits. +The @BuilderState@ argument must not be reused after being passed +to this function. That is, its use must be affine. +-} closeBuilderState :: BuilderState s -> Commits s closeBuilderState (BuilderState dst off _ cmts) = Mutable dst off cmts --- | Run a builder, performing an in-place update on the state. --- The @BuilderState@ argument must not be reused after being passed --- to this function. That is, its use must be affine. +{- | Run a builder, performing an in-place update on the state. +The @BuilderState@ argument must not be reused after being passed +to this function. That is, its use must be affine. +-} pasteST :: Builder -> BuilderState s -> ST s (BuilderState s) -{-# inline pasteST #-} +{-# INLINE pasteST #-} pasteST (Builder f) (BuilderState buf off len cmts) = ST $ \s0 -> case f buf off len cmts s0 of (# s1, buf1, off1, len1, cmts1 #) -> @@ -105,48 +116,54 @@ pasteST (Builder f) (BuilderState buf off len cmts) = ST $ \s0 -> -- | Variant of 'pasteST' that runs in 'IO'. pasteIO :: Builder -> BuilderState RealWorld -> IO (BuilderState RealWorld) -{-# inline pasteIO #-} +{-# INLINE pasteIO #-} pasteIO b st = stToIO (pasteST b st) instance IsString Builder where - {-# inline fromString #-} + {-# INLINE fromString #-} fromString = stringUtf8 instance Semigroup Builder where - {-# inline (<>) #-} + {-# INLINE (<>) #-} Builder f <> Builder g = Builder $ \buf0 off0 len0 cs0 s0 -> case f buf0 off0 len0 cs0 s0 of (# s1, buf1, off1, len1, cs1 #) -> g buf1 off1 len1 cs1 s1 instance Monoid Builder where - {-# inline mempty #-} + {-# INLINE mempty #-} mempty = Builder $ \buf0 off0 len0 cs0 s0 -> (# s0, buf0, off0, len0, cs0 #) data Commits s = Mutable + -- | Mutable buffer, start index implicitly zero (MutableByteArray# s) - -- ^ Mutable buffer, start index implicitly zero - Int# -- ^ Length (may be smaller than actual length) + -- | Length (may be smaller than actual length) + Int# !(Commits s) | Immutable - ByteArray# -- ^ Immutable chunk - Int# -- ^ Offset into chunk, not necessarily zero - Int# -- ^ Length (may be smaller than actual length) + -- | Immutable chunk + ByteArray# + -- | Offset into chunk, not necessarily zero + Int# + -- | Length (may be smaller than actual length) + Int# !(Commits s) | Initial --- | Add the total number of bytes in the commits to first --- argument. +{- | Add the total number of bytes in the commits to first +argument. +-} addCommitsLength :: Int -> Commits s -> Int addCommitsLength !acc Initial = acc addCommitsLength !acc (Immutable _ _ x cs) = addCommitsLength (acc + I# x) cs addCommitsLength !acc (Mutable _ x cs) = addCommitsLength (acc + I# x) cs --- | Cons the chunks from a list of @Commits@ onto an initial --- @Chunks@ list (this argument is often @ChunksNil@). This reverses --- the order of the chunks, which is desirable since builders assemble --- @Commits@ with the chunks backwards. This performs an in-place shrink --- and freezes any mutable byte arrays it encounters. Consequently, --- these must not be reused. +{- | Cons the chunks from a list of @Commits@ onto an initial +@Chunks@ list (this argument is often @ChunksNil@). This reverses +the order of the chunks, which is desirable since builders assemble +@Commits@ with the chunks backwards. This performs an in-place shrink +and freezes any mutable byte arrays it encounters. Consequently, +these must not be reused. +-} reverseCommitsOntoChunks :: Chunks -> Commits s -> ST s Chunks reverseCommitsOntoChunks !xs Initial = pure xs reverseCommitsOntoChunks !xs (Immutable arr off len cs) = @@ -159,17 +176,18 @@ reverseCommitsOntoChunks !xs (Mutable buf len cs) = case len of arr <- PM.unsafeFreezeByteArray (MutableByteArray buf) reverseCommitsOntoChunks (ChunksCons (Bytes arr 0 (I# len)) xs) cs --- | Variant of 'reverseCommitsOntoChunks' that does not reverse --- the order of the commits. Since commits are built backwards by --- consing, this means that the chunks appended to the front will --- be backwards. Within each chunk, however, the bytes will be in --- the correct order. --- --- Unlike 'reverseCommitsOntoChunks', this function is not tail --- recursive. +{- | Variant of 'reverseCommitsOntoChunks' that does not reverse +the order of the commits. Since commits are built backwards by +consing, this means that the chunks appended to the front will +be backwards. Within each chunk, however, the bytes will be in +the correct order. + +Unlike 'reverseCommitsOntoChunks', this function is not tail +recursive. +-} commitsOntoChunks :: Chunks -> Commits s -> ST s Chunks commitsOntoChunks !xs0 cs0 = go cs0 - where + where go Initial = pure xs0 go (Immutable arr off len cs) = do xs <- go cs @@ -183,53 +201,65 @@ commitsOntoChunks !xs0 cs0 = go cs0 xs <- go cs pure $! ChunksCons (Bytes arr 0 (I# len)) xs --- | Copy the contents of the chunks into a mutable array, reversing --- the order of the chunks. --- Precondition: The destination must have enough space to house the --- contents. This is not checked. +{- | Copy the contents of the chunks into a mutable array, reversing +the order of the chunks. +Precondition: The destination must have enough space to house the +contents. This is not checked. +-} copyReverseCommits :: - MutableByteArray s -- ^ Destination - -> Int -- ^ Destination range successor - -> Commits s -- ^ Source - -> ST s Int -{-# inline copyReverseCommits #-} -copyReverseCommits (MutableByteArray dst) (I# off) cs = ST - (\s0 -> case copyReverseCommits# dst off cs s0 of - (# s1, nextOff #) -> (# s1, I# nextOff #) - ) + -- | Destination + MutableByteArray s -> + -- | Destination range successor + Int -> + -- | Source + Commits s -> + ST s Int +{-# INLINE copyReverseCommits #-} +copyReverseCommits (MutableByteArray dst) (I# off) cs = + ST + ( \s0 -> case copyReverseCommits# dst off cs s0 of + (# s1, nextOff #) -> (# s1, I# nextOff #) + ) copyReverseCommits# :: - MutableByteArray# s - -> Int# - -> Commits s - -> State# s - -> (# State# s, Int# #) + MutableByteArray# s -> + Int# -> + Commits s -> + State# s -> + (# State# s, Int# #) copyReverseCommits# _ off Initial s0 = (# s0, off #) copyReverseCommits# marr prevOff (Mutable arr sz cs) s0 = - let !off = prevOff -# sz in - case Op.copyMutableByteArray# arr 0# marr off sz s0 of - s1 -> copyReverseCommits# marr off cs s1 + let !off = prevOff -# sz + in case Op.copyMutableByteArray# arr 0# marr off sz s0 of + s1 -> copyReverseCommits# marr off cs s1 copyReverseCommits# marr prevOff (Immutable arr soff sz cs) s0 = - let !off = prevOff -# sz in - case Op.copyByteArray# arr soff marr off sz s0 of - s1 -> copyReverseCommits# marr off cs s1 + let !off = prevOff -# sz + in case Op.copyByteArray# arr soff marr off sz s0 of + s1 -> copyReverseCommits# marr off cs s1 --- | Create a builder from a cons-list of 'Char'. These --- must be UTF-8 encoded. +{- | Create a builder from a cons-list of 'Char'. These +must be UTF-8 encoded. +-} stringUtf8 :: String -> Builder -{-# inline stringUtf8 #-} +{-# INLINE stringUtf8 #-} stringUtf8 cs = Builder (goString cs) --- | Create a builder from a @NUL@-terminated 'CString'. This ignores any --- textual encoding, copying bytes until @NUL@ is reached. +{- | Create a builder from a @NUL@-terminated 'CString'. This ignores any +textual encoding, copying bytes until @NUL@ is reached. +-} cstring :: CString -> Builder -{-# inline cstring #-} +{-# INLINE cstring #-} cstring (Ptr cs) = Builder (goCString cs) -goString :: String - -> MutableByteArray# s -> Int# -> Int# -> Commits s - -> State# s -> (# State# s, MutableByteArray# s, Int#, Int#, Commits s #) -{-# noinline goString #-} +goString :: + String -> + MutableByteArray# s -> + Int# -> + Int# -> + Commits s -> + State# s -> + (# State# s, MutableByteArray# s, Int#, Int#, Commits s #) +{-# NOINLINE goString #-} goString [] buf0 off0 len0 cs0 s0 = (# s0, buf0, off0, len0, cs0 #) goString (c : cs) buf0 off0 len0 cs0 s0 = case len0 ># 3# of 1# -> case unST (UnsafeBounded.pasteST (Bounded.char c) (MutableByteArray buf0) (I# off0)) s0 of @@ -245,39 +275,53 @@ goString (c : cs) buf0 off0 len0 cs0 s0 = case len0 ># 3# of -- used Modified UTF-8. {-# RULES "Builder stringUtf8/cstring" forall s a b c d e. - goString (unpackCString# s) a b c d e = goCString s a b c d e + goString (unpackCString# s) a b c d e = + goCString s a b c d e "Builder stringUtf8/cstring-utf8" forall s a b c d e. - goString (unpackCStringUtf8# s) a b c d e = goCString s a b c d e -#-} + goString (unpackCStringUtf8# s) a b c d e = + goCString s a b c d e + #-} -goCString :: Addr# -> MutableByteArray# s -> Int# -> Int# -> Commits s - -> State# s -> (# State# s, MutableByteArray# s, Int#, Int#, Commits s #) +goCString :: + Addr# -> + MutableByteArray# s -> + Int# -> + Int# -> + Commits s -> + State# s -> + (# State# s, MutableByteArray# s, Int#, Int#, Commits s #) goCString addr buf0 off0 len0 cs0 s0 = case C.word8ToWord# (Exts.indexWord8OffAddr# addr 0#) of 0## -> (# s0, buf0, off0, len0, cs0 #) w -> case len0 of 0# -> case Exts.newByteArray# 4080# s0 of (# s1, buf1 #) -> case Exts.writeWord8Array# buf1 0# (C.wordToWord8# w) s1 of - s2 -> goCString - (Exts.plusAddr# addr 1# ) buf1 1# (4080# -# 1# ) - (Mutable buf0 off0 cs0) - s2 + s2 -> + goCString + (Exts.plusAddr# addr 1#) + buf1 + 1# + (4080# -# 1#) + (Mutable buf0 off0 cs0) + s2 _ -> case Exts.writeWord8Array# buf0 off0 (C.wordToWord8# w) s0 of - s1 -> goCString (Exts.plusAddr# addr 1# ) buf0 (off0 +# 1# ) (len0 -# 1# ) cs0 s1 + s1 -> goCString (Exts.plusAddr# addr 1#) buf0 (off0 +# 1#) (len0 -# 1#) cs0 s1 fromEffect :: - Int -- ^ Maximum number of bytes the paste function needs - -> (forall s. MutableByteArray s -> Int -> ST s Int) - -- ^ Paste function. Takes a byte array and an offset and returns - -- the new offset and having pasted into the buffer. - -> Builder -{-# inline fromEffect #-} + -- | Maximum number of bytes the paste function needs + Int -> + -- | Paste function. Takes a byte array and an offset and returns + -- the new offset and having pasted into the buffer. + (forall s. MutableByteArray s -> Int -> ST s Int) -> + Builder +{-# INLINE fromEffect #-} fromEffect (I# req) f = Builder $ \buf0 off0 len0 cs0 s0 -> let !(# s1, buf1, off1, len1, cs1 #) = case len0 >=# req of 1# -> (# s0, buf0, off0, len0, cs0 #) - _ -> let !(I# lenX) = max 4080 (I# req) in - case Exts.newByteArray# lenX s0 of - (# sX, bufX #) -> - (# sX, bufX, 0#, lenX, Mutable buf0 off0 cs0 #) + _ -> + let !(I# lenX) = max 4080 (I# req) + in case Exts.newByteArray# lenX s0 of + (# sX, bufX #) -> + (# sX, bufX, 0#, lenX, Mutable buf0 off0 cs0 #) in case unST (f (MutableByteArray buf1) (I# off1)) s1 of (# s2, I# off2 #) -> (# s2, buf1, off2, len1 -# (off2 -# off1), cs1 #) @@ -288,24 +332,26 @@ shrinkMutableByteArray :: MutableByteArray s -> Int -> ST s () shrinkMutableByteArray (MutableByteArray arr) (I# sz) = primitive_ (Exts.shrinkMutableByteArray# arr sz) --- | Variant of commitDistance where you get to supply a --- head of the commit list that has not yet been committed. +{- | Variant of commitDistance where you get to supply a +head of the commit list that has not yet been committed. +-} commitDistance1 :: - MutableByteArray# s -- target - -> Int# -- offset into target - -> MutableByteArray# s -- head of array - -> Int# -- offset into head of array - -> Commits s - -> Int# + MutableByteArray# s -> -- target + Int# -> -- offset into target + MutableByteArray# s -> -- head of array + Int# -> -- offset into head of array + Commits s -> + Int# commitDistance1 target offTarget buf0 offBuf cs = case Exts.sameMutableByteArray# target buf0 of 1# -> offBuf -# offTarget _ -> commitDistance target offBuf cs -# offTarget --- | Compute the number of bytes between the last byte and the offset --- specified in a chunk. Precondition: the chunk must exist in the --- list of committed chunks. This relies on mutable byte arrays having --- identity (e.g. it uses @sameMutableByteArray#@). +{- | Compute the number of bytes between the last byte and the offset +specified in a chunk. Precondition: the chunk must exist in the +list of committed chunks. This relies on mutable byte arrays having +identity (e.g. it uses @sameMutableByteArray#@). +-} commitDistance :: MutableByteArray# s -> Int# -> Commits s -> Int# commitDistance !_ !_ Initial = errorWithoutStackTrace "chunkDistance: chunk not found" commitDistance target !n (Immutable _ _ len cs) = @@ -315,46 +361,59 @@ commitDistance target !n (Mutable buf len cs) = 1# -> n +# len _ -> commitDistance target (n +# len) cs --- | Encode (UTF-8 encoded) text as a JSON string, wrapping it in double quotes. --- This escapes all characters with code points below @0x20@. --- --- * Precondition: The slice of the byte argument is UTF-8 encoded text. --- * Precondition: There is enough space in the buffer for the result --- to be written to. A simple way to ensure enough space is to allocate --- @3N + 2@ bytes, where N is the length of the argument. However, the --- caller may use clever heuristics to find a lower upper bound. --- * Result: The next offset in the destination buffer +{- | Encode (UTF-8 encoded) text as a JSON string, wrapping it in double quotes. +This escapes all characters with code points below @0x20@. + +* Precondition: The slice of the byte argument is UTF-8 encoded text. +* Precondition: There is enough space in the buffer for the result + to be written to. A simple way to ensure enough space is to allocate + @6N + 2@ bytes, where N is the length of the argument. However, the + caller may use clever heuristics to find a lower upper bound. +* Result: The next offset in the destination buffer +-} pasteUtf8TextJson# :: - ByteArray# -- ^ source - -> Int# -- ^ source offset - -> Int# -- ^ source length - -> MutableByteArray# s -- ^ destination buffer - -> Int# -- ^ offset into destination buffer - -> State# s -- ^ state token - -> (# State# s, Int# #) -- returns next destination offset -{-# noinline pasteUtf8TextJson# #-} + -- | source + ByteArray# -> + -- | source offset + Int# -> + -- | source length + Int# -> + -- | destination buffer + MutableByteArray# s -> + -- | offset into destination buffer + Int# -> + -- | state token + State# s -> + (# State# s, Int# #) -- returns next destination offset +{-# NOINLINE pasteUtf8TextJson# #-} pasteUtf8TextJson# src# soff0# slen0# dst# doff0# s0# = let ST f = do let dst = MutableByteArray dst# let doff0 = I# doff0# PM.writeByteArray dst doff0 (c2w '"') - let go !soff !slen !doff = if slen > 0 - then case indexChar8Array (ByteArray src#) soff of - '\\' -> write2 dst doff '\\' '\\' *> go (soff + 1) (slen - 1) (doff + 2) - '\"' -> write2 dst doff '\\' '\"' *> go (soff + 1) (slen - 1) (doff + 2) - c -> if c >= '\x20' - then PM.writeByteArray dst doff (c2w c) *> go (soff + 1) (slen - 1) (doff + 1) - else case c of - '\n' -> write2 dst doff '\\' 'n' *> go (soff + 1) (slen - 1) (doff + 2) - '\r' -> write2 dst doff '\\' 'r' *> go (soff + 1) (slen - 1) (doff + 2) - '\t' -> write2 dst doff '\\' 't' *> go (soff + 1) (slen - 1) (doff + 2) - _ -> do - write2 dst doff '\\' 'u' - doff' <- UnsafeBounded.pasteST - (Bounded.word16PaddedUpperHex (fromIntegral (c2w c))) - dst (doff + 2) - go (soff + 1) (slen - 1) doff' - else pure doff + let go !soff !slen !doff = + if slen > 0 + then case indexChar8Array (ByteArray src#) soff of + '\\' -> write2 dst doff '\\' '\\' *> go (soff + 1) (slen - 1) (doff + 2) + '\"' -> write2 dst doff '\\' '\"' *> go (soff + 1) (slen - 1) (doff + 2) + c -> + if c >= '\x20' + then PM.writeByteArray dst doff (c2w c) *> go (soff + 1) (slen - 1) (doff + 1) + else case c of + '\n' -> write2 dst doff '\\' 'n' *> go (soff + 1) (slen - 1) (doff + 2) + '\r' -> write2 dst doff '\\' 'r' *> go (soff + 1) (slen - 1) (doff + 2) + '\t' -> write2 dst doff '\\' 't' *> go (soff + 1) (slen - 1) (doff + 2) + '\b' -> write2 dst doff '\\' 'b' *> go (soff + 1) (slen - 1) (doff + 2) + '\f' -> write2 dst doff '\\' 'f' *> go (soff + 1) (slen - 1) (doff + 2) + _ -> do + write2 dst doff '\\' 'u' + doff' <- + UnsafeBounded.pasteST + (Bounded.word16PaddedUpperHex (fromIntegral (c2w c))) + dst + (doff + 2) + go (soff + 1) (slen - 1) doff' + else pure doff doffRes <- go (I# soff0#) (I# slen0#) (doff0 + 1) PM.writeByteArray dst doffRes (c2w '"') pure (doffRes + 1) @@ -362,7 +421,7 @@ pasteUtf8TextJson# src# soff0# slen0# dst# doff0# s0# = in (# s1, dstFinal #) c2w :: Char -> Word8 -{-# inline c2w #-} +{-# INLINE c2w #-} c2w = fromIntegral . ord -- Internal. Write two characters in the ASCII plane to a byte array. @@ -372,5 +431,5 @@ write2 marr ix a b = do PM.writeByteArray marr (ix + 1) (c2w b) indexChar8Array :: ByteArray -> Int -> Char -{-# inline indexChar8Array #-} +{-# INLINE indexChar8Array #-} indexChar8Array (ByteArray b) (I# i) = C# (Exts.indexCharArray# b i) diff --git a/test/Main.hs b/test/Main.hs index 1b206c7..af18163 100644 --- a/test/Main.hs +++ b/test/Main.hs @@ -1,53 +1,58 @@ -{-# language BangPatterns #-} -{-# language NumericUnderscores #-} -{-# language OverloadedStrings #-} -{-# language QuasiQuotes #-} -{-# language ScopedTypeVariables #-} -{-# language TypeApplications #-} - +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE NumericUnderscores #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE QuasiQuotes #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TypeApplications #-} {-# OPTIONS_GHC -fno-warn-orphans #-} import Prelude hiding (replicate) +-- liftA2 is needed by GHC 9.4 import Control.Applicative (liftA2) import Control.Monad.ST (runST) import Data.Bytes.Builder -import Data.Bytes.Builder.Template (bldr) -import Data.Bytes.Types (MutableBytes(MutableBytes)) -import Data.Char (ord,chr) -import Data.IORef (IORef,newIORef,readIORef,writeIORef) -import Data.Maybe (fromMaybe) -import Data.Primitive (ByteArray) -import Data.Primitive (PrimArray) -import Data.Text.Short (ShortText) -import Data.WideWord (Word128(Word128),Word256(Word256)) +import Data.Bytes.Types (MutableBytes (MutableBytes)) +import Data.Char (chr, ord) +import Data.IORef (IORef, newIORef, readIORef, writeIORef) +import Data.Primitive (ByteArray, PrimArray) +import Data.WideWord (Word128 (Word128), Word256 (Word256)) import Data.Word import Numeric.Natural (Natural) -import Test.QuickCheck ((===),Arbitrary) +import Test.QuickCheck (Arbitrary, (===)) import Test.QuickCheck.Instances.Natural () -import Test.Tasty (defaultMain,testGroup,TestTree) +import Test.Tasty (TestTree, defaultMain, testGroup) import Test.Tasty.HUnit ((@=?)) import Text.Printf (printf) +#ifdef QUOTER +import Data.Bytes.Builder.Template (bldr) +import Data.Maybe (fromMaybe) +import Data.Text.Short (ShortText) +#endif + import qualified Arithmetic.Nat as Nat import qualified Data.Bits as Bits -import qualified Data.Bytes as Bytes -import qualified Data.Bytes.Builder as Builder -import qualified Data.Bytes.Builder.Bounded as Bounded -import qualified Data.Bytes.Chunks as Chunks -import qualified Data.Bytes.Text.Ascii as Ascii -import qualified Data.Bytes.Text.Latin1 as Latin1 import qualified Data.ByteString as ByteString import qualified Data.ByteString.Builder as BB import qualified Data.ByteString.Lazy.Char8 as LB +import qualified Data.Bytes as Bytes +import qualified Data.Bytes.Builder.Bounded as Bounded +import qualified Data.Bytes.Chunks as Chunks +import qualified Data.Bytes.Text.Latin1 as Latin1 import qualified Data.List as L import qualified Data.Primitive as PM import qualified Data.Text as T import qualified Data.Text.Encoding as TE import qualified GHC.Exts as Exts -import qualified Prelude import qualified Test.Tasty.HUnit as THU import qualified Test.Tasty.QuickCheck as TQC +import qualified Prelude + +#ifdef QUOTER +import qualified Data.Bytes.Builder as Builder +import qualified Data.Bytes.Text.Ascii as Ascii +#endif import qualified HexWord64 import qualified Word16Tree @@ -56,299 +61,311 @@ main :: IO () main = defaultMain tests tests :: TestTree -tests = testGroup "Tests" - [ testGroup "live" - [ TQC.testProperty "word64Dec" $ \w -> - runConcat 1 (word64Dec w) === pack (show w) - , TQC.testProperty "word64Dec-x3" $ \x y z -> - runConcat 1 (word64Dec x <> word64Dec y <> word64Dec z) - === - pack (show x ++ show y ++ show z) - , TQC.testProperty "int64Dec-x3" $ \x y z -> - runConcat 1 (int64Dec x <> int64Dec y <> int64Dec z) - === - pack (show x ++ show y ++ show z) - , TQC.testProperty "word64BE-x3" $ \x y z -> - runConcat 1 (word64BE x <> word64BE y <> word64BE z) - === - pack (LB.unpack (BB.toLazyByteString (BB.word64BE x <> BB.word64BE y <> BB.word64BE z))) - , TQC.testProperty "word256PaddedLowerHex" $ \w -> - Bounded.run Nat.constant (Bounded.word256PaddedLowerHex w) - === - pack (showWord256PaddedLowerHex w) - , TQC.testProperty "word128PaddedUpperHex" $ \w -> - Bounded.run Nat.constant (Bounded.word128PaddedUpperHex w) - === - pack (showWord128PaddedUpperHex w) - , TQC.testProperty "word64PaddedUpperHex" $ \w -> - runConcat 1 (word64PaddedUpperHex w) - === - pack (showWord64PaddedUpperHex w) - , TQC.testProperty "word16PaddedLowerHex" $ \w -> - runConcat 1 (word16PaddedLowerHex w) - === - pack (showWord16PaddedLowerHex w) - , TQC.testProperty "wordPaddedDec2" $ TQC.forAll (TQC.choose (0,99)) $ \w -> - Bounded.run Nat.two (Bounded.wordPaddedDec2 w) - === - pack (zeroPadL 2 (show w)) - , TQC.testProperty "wordPaddedDec4" $ TQC.forAll (TQC.choose (0,9999)) $ \w -> - Bounded.run Nat.constant (Bounded.wordPaddedDec4 w) - === - pack (zeroPadL 4 (show w)) - , TQC.testProperty "wordPaddedDec9" $ TQC.forAll (TQC.choose (0,999999999)) $ \w -> - Bounded.run Nat.constant (Bounded.wordPaddedDec9 w) - === - pack (zeroPadL 9 (show w)) - , TQC.testProperty "word8Dec" $ \w -> - runConcat 1 (word8Dec w) - === - pack (show w) - , TQC.testProperty "consLength32BE" $ \w -> - runConcat 1 (consLength32BE (word8Dec w)) - === - pack ('\x00' : '\x00' : '\x00' : chr (L.length (show w)) : show w) - , TQC.testProperty "consLength64BE-uni" $ \w -> - pack - ( '\x00' : '\x00' : '\x00' : '\x00' - : '\x00' : '\x00' : '\x00' : chr (L.length (show w)) - : show w - ) - === - runConcat 1 (consLength64BE (word16Dec w)) - , TQC.testProperty "consLength64BE-multi" $ \w -> - pack - ( '\x00' : '\x00' : '\x00' : '\x00' - : '\x00' : '\x00' : '\x00' : chr (1 + L.length (show w)) - : '\x42' : show w - ) - === - runConcat 1 (consLength64BE (word8 0x42 <> flush 2 <> word16Dec w)) - , THU.testCase "stringUtf8" $ - packUtf8 "¿Cómo estás? I am doing well." @=? - runConcat 1 (stringUtf8 "¿Cómo estás? I am doing well.") - , THU.testCase "doubleDec-A" $ - pack (show (2 :: Int)) @=? runConcat 1 (doubleDec 2.0) - , THU.testCase "doubleDec-B" $ - pack (show (2.5 :: Double)) @=? runConcat 1 (doubleDec 2.5) - , THU.testCase "doubleDec-C" $ - pack ("1e+15") @=? runConcat 1 (doubleDec 1e15) - , THU.testCase "doubleDec-D" $ - pack ("-42") @=? runConcat 1 (doubleDec (-42)) - , THU.testCase "doubleDec-E" $ - AsciiByteArray (pack ("-8.8888888888889e+14")) @=? AsciiByteArray (runConcat 1 (doubleDec (-888888888888888.8888888))) - , THU.testCase "doubleDec-F" $ - pack ("42") @=? runConcat 1 (doubleDec 42) - , THU.testCase "doubleDec-G" $ - pack ("0") @=? runConcat 1 (doubleDec 0) - , THU.testCase "doubleDec-H" $ - pack ("0.5") @=? runConcat 1 (doubleDec 0.5) - , THU.testCase "doubleDec-I" $ - pack ("-0.5") @=? runConcat 1 (doubleDec (-0.5)) - , THU.testCase "doubleDec-J" $ - pack ("999999999") @=? runConcat 1 (doubleDec 999999999) - , THU.testCase "doubleDec-K" $ - pack ("-99999999") @=? runConcat 1 (doubleDec (-99999999)) - , THU.testCase "doubleDec-L" $ - AsciiByteArray (pack ("6.6666666666667e-12")) @=? AsciiByteArray (runConcat 1 (doubleDec (2 / 300_000_000_000))) - , THU.testCase "doubleDec-M" $ - AsciiByteArray (pack ("6.6666666666667e-10")) @=? AsciiByteArray (runConcat 1 (doubleDec 6.666666666666667e-10)) - , THU.testCase "doubleDec-N" $ - AsciiByteArray (pack ("5e-10")) @=? AsciiByteArray (runConcat 1 (doubleDec 5.0e-10)) - , THU.testCase "doubleDec-O" $ - AsciiByteArray (pack ("1.6666666666667e-10")) @=? AsciiByteArray (runConcat 1 (doubleDec 1.6666666666666669e-10)) - , THU.testCase "doubleDec-P" $ - AsciiByteArray (pack ("1e-09")) @=? AsciiByteArray (runConcat 1 (doubleDec 1.0e-9)) - , THU.testCase "doubleDec-Q" $ - AsciiByteArray (pack ("1e-08")) @=? AsciiByteArray (runConcat 1 (doubleDec 1.0e-8)) - , THU.testCase "shortTextJsonString-A" $ - pack ("\"hello\"") @=? runConcat 1 (shortTextJsonString "hello") - , THU.testCase "shortTextJsonString-B" $ - pack ("\"\\\\_\\\"_/\"") @=? runConcat 1 (shortTextJsonString "\\_\"_/") - , THU.testCase "shortTextJsonString-C" $ - pack ("\"Hi\\r\\nLo\"") @=? runConcat 1 (shortTextJsonString "Hi\r\nLo") - , THU.testCase "shortTextJsonString-D" $ - pack ("\"Hi\\u001BLo\"") @=? runConcat 1 (shortTextJsonString "Hi\ESCLo") - , THU.testCase "word-16-tree" $ - Word16Tree.expectedSmall @=? runConcat 1 - (Word16Tree.encode Word16Tree.exampleSmall) - , THU.testCase "byteArray-small" $ - let a = replicateByte 3 0x50 - b = replicateByte 5 0x51 - in mconcat [a,b] @=? runConcat 1 - ( byteArray a <> byteArray b ) - , THU.testCase "byteArray-big" $ - let a = replicateByte 2105 0x50 - b = replicateByte 725 0x51 - c = replicateByte 900 0x52 - d = replicateByte 800 0x53 - e = replicateByte 700 0x54 - f = replicateByte 950 0x55 - g = replicateByte 975 0x56 - h = replicateByte 3000 0x57 - i = replicateByte 125 0x58 - in mconcat [a,b,c,d,e,f,g,h,i] @=? runConcat 1 - ( byteArray a <> byteArray b <> byteArray c <> - byteArray d <> byteArray e <> byteArray f <> - byteArray g <> byteArray h <> byteArray i +tests = + testGroup + "Tests" + [ testGroup + "live" + [ TQC.testProperty "word64Dec" $ \w -> + runConcat 1 (word64Dec w) === pack (show w) + , TQC.testProperty "word64Dec-x3" $ \x y z -> + runConcat 1 (word64Dec x <> word64Dec y <> word64Dec z) + === pack (show x ++ show y ++ show z) + , TQC.testProperty "int64Dec-x3" $ \x y z -> + runConcat 1 (int64Dec x <> int64Dec y <> int64Dec z) + === pack (show x ++ show y ++ show z) + , TQC.testProperty "word64BE-x3" $ \x y z -> + runConcat 1 (word64BE x <> word64BE y <> word64BE z) + === pack (LB.unpack (BB.toLazyByteString (BB.word64BE x <> BB.word64BE y <> BB.word64BE z))) + , TQC.testProperty "word256PaddedLowerHex" $ \w -> + Bounded.run Nat.constant (Bounded.word256PaddedLowerHex w) + === pack (showWord256PaddedLowerHex w) + , TQC.testProperty "word128PaddedUpperHex" $ \w -> + Bounded.run Nat.constant (Bounded.word128PaddedUpperHex w) + === pack (showWord128PaddedUpperHex w) + , TQC.testProperty "word64PaddedUpperHex" $ \w -> + runConcat 1 (word64PaddedUpperHex w) + === pack (showWord64PaddedUpperHex w) + , TQC.testProperty "word16PaddedLowerHex" $ \w -> + runConcat 1 (word16PaddedLowerHex w) + === pack (showWord16PaddedLowerHex w) + , TQC.testProperty "wordPaddedDec2" $ TQC.forAll (TQC.choose (0, 99)) $ \w -> + Bounded.run Nat.two (Bounded.wordPaddedDec2 w) + === pack (zeroPadL 2 (show w)) + , TQC.testProperty "wordPaddedDec4" $ TQC.forAll (TQC.choose (0, 9999)) $ \w -> + Bounded.run Nat.constant (Bounded.wordPaddedDec4 w) + === pack (zeroPadL 4 (show w)) + , TQC.testProperty "wordPaddedDec9" $ TQC.forAll (TQC.choose (0, 999999999)) $ \w -> + Bounded.run Nat.constant (Bounded.wordPaddedDec9 w) + === pack (zeroPadL 9 (show w)) + , TQC.testProperty "word8Dec" $ \w -> + runConcat 1 (word8Dec w) + === pack (show w) + , TQC.testProperty "consLength32BE" $ \w -> + runConcat 1 (consLength32BE (word8Dec w)) + === pack ('\x00' : '\x00' : '\x00' : chr (L.length (show w)) : show w) + , TQC.testProperty "consLength64BE-uni" $ \w -> + pack + ( '\x00' + : '\x00' + : '\x00' + : '\x00' + : '\x00' + : '\x00' + : '\x00' + : chr (L.length (show w)) + : show w ) - , TQC.testProperty "word16ArrayLE" $ \(xs :: [Word16]) -> - let ys = Exts.fromList xs :: PrimArray Word16 - in runConcat 1 (foldMap word16LE xs) - === - runConcat 1 (word16ArrayLE ys 0 (Prelude.length xs)) - , TQC.testProperty "word16ArrayBE" $ \(xs :: [Word16]) -> - let ys = Exts.fromList xs :: PrimArray Word16 - in runConcat 1 (foldMap word16BE xs) - === - runConcat 1 (word16ArrayBE ys 0 (Prelude.length xs)) - , TQC.testProperty "word32ArrayLE" $ \(xs :: [Word32]) -> - let ys = Exts.fromList xs :: PrimArray Word32 - in runConcat 1 (foldMap word32LE xs) - === - runConcat 1 (word32ArrayLE ys 0 (Prelude.length xs)) - , TQC.testProperty "word32ArrayBE" $ \(xs :: [Word32]) -> - let ys = Exts.fromList xs :: PrimArray Word32 - in runConcat 1 (foldMap word32BE xs) - === - runConcat 1 (word32ArrayBE ys 0 (Prelude.length xs)) - , TQC.testProperty "word64ArrayLE" $ \(xs :: [Word64]) -> - let ys = Exts.fromList xs :: PrimArray Word64 - in runConcat 1 (foldMap word64LE xs) - === - runConcat 1 (word64ArrayLE ys 0 (Prelude.length xs)) - , TQC.testProperty "word64ArrayBE" $ \(xs :: [Word64]) -> - let ys = Exts.fromList xs :: PrimArray Word64 - in runConcat 1 (foldMap word64BE xs) - === - runConcat 1 (word64ArrayBE ys 0 (Prelude.length xs)) - , TQC.testProperty "word128ArrayLE" $ \(xs :: [Word128]) -> - let ys = Exts.fromList xs :: PrimArray Word128 - in runConcat 1 (foldMap word128LE xs) - === - runConcat 1 (word128ArrayLE ys 0 (Prelude.length xs)) - , TQC.testProperty "word128ArrayBE" $ \(xs :: [Word128]) -> - let ys = Exts.fromList xs :: PrimArray Word128 - in runConcat 1 (foldMap word128BE xs) - === - runConcat 1 (word128ArrayBE ys 0 (Prelude.length xs)) - , TQC.testProperty "word256ArrayLE" $ \(xs :: [Word256]) -> - let ys = Exts.fromList xs :: PrimArray Word256 - in runConcat 1 (foldMap word256LE xs) - === - runConcat 1 (word256ArrayLE ys 0 (Prelude.length xs)) - , TQC.testProperty "word256ArrayBE" $ \(xs :: [Word256]) -> - let ys = Exts.fromList xs :: PrimArray Word256 - in runConcat 1 (foldMap word256BE xs) - === - runConcat 1 (word256ArrayBE ys 0 (Prelude.length xs)) - , TQC.testProperty "word64Vlq" $ \(x :: Word64) -> - runConcat 1 (word64Vlq x) - === - naiveVlq (fromIntegral x) - , TQC.testProperty "word64LEB128" $ \(x :: Word64) -> - runConcat 1 (word64LEB128 x) - === - naiveLeb128 (fromIntegral x) - , TQC.testProperty "naturalDec-A" $ \(x :: Natural) -> - runConcat 1 (naturalDec x) - === - pack (show x) - , TQC.testProperty "naturalDec-B" $ \(x :: Natural) -> - let y = 1234567892345678934678987654321 * x in - runConcat 1 (naturalDec y) - === - pack (show y) - , testGroup "leb128-encoding" - [ THU.testCase "16" $ - Chunks.concat (run 16 (word64LEB128 16)) - @=? - Latin1.fromString "\x10" - , THU.testCase "1000000" $ - Chunks.concat (run 16 (word64LEB128 1000000)) - @=? - Exts.fromList [0xc0,0x84,0x3d] - , THU.testCase "deadbeef-smile" $ do - let inp = Latin1.fromString "\xDE\xAD\xBE\xEF" - (Chunks.concat . run 16) (sevenEightSmile inp) - @=?Latin1.fromString "\x6F\x2B\x37\x6E\x0F" - ] - , testGroup "seven/eight encoding" - [ THU.testCase "deadbeef" $ do - let inp = Latin1.fromString "\xDE\xAD\xBE\xEF" - (Chunks.concat . run 16) (sevenEightRight inp) - @=? Latin1.fromString "\x6F\x2B\x37\x6E\x78" - , THU.testCase "deadbeef-smile" $ do - let inp = Latin1.fromString "\xDE\xAD\xBE\xEF" - (Chunks.concat . run 16) (sevenEightSmile inp) - @=?Latin1.fromString "\x6F\x2B\x37\x6E\x0F" - ] + === runConcat 1 (consLength64BE (word16Dec w)) + , TQC.testProperty "consLength64BE-multi" $ \w -> + pack + ( '\x00' + : '\x00' + : '\x00' + : '\x00' + : '\x00' + : '\x00' + : '\x00' + : chr (1 + L.length (show w)) + : '\x42' + : show w + ) + === runConcat 1 (consLength64BE (word8 0x42 <> flush 2 <> word16Dec w)) + , THU.testCase "stringUtf8" $ + packUtf8 "¿Cómo estás? I am doing well." + @=? runConcat 1 (stringUtf8 "¿Cómo estás? I am doing well.") + , THU.testCase "doubleDec-A" $ + pack (show (2 :: Int)) @=? runConcat 1 (doubleDec 2.0) + , THU.testCase "doubleDec-B" $ + pack (show (2.5 :: Double)) @=? runConcat 1 (doubleDec 2.5) + , THU.testCase "doubleDec-C" $ + pack ("1e+15") @=? runConcat 1 (doubleDec 1e15) + , THU.testCase "doubleDec-D" $ + pack ("-42") @=? runConcat 1 (doubleDec (-42)) + , THU.testCase "doubleDec-E" $ + AsciiByteArray (pack ("-8.8888888888889e+14")) @=? AsciiByteArray (runConcat 1 (doubleDec (-888888888888888.8888888))) + , THU.testCase "doubleDec-F" $ + pack ("42") @=? runConcat 1 (doubleDec 42) + , THU.testCase "doubleDec-G" $ + pack ("0") @=? runConcat 1 (doubleDec 0) + , THU.testCase "doubleDec-H" $ + pack ("0.5") @=? runConcat 1 (doubleDec 0.5) + , THU.testCase "doubleDec-I" $ + pack ("-0.5") @=? runConcat 1 (doubleDec (-0.5)) + , THU.testCase "doubleDec-J" $ + pack ("999999999") @=? runConcat 1 (doubleDec 999999999) + , THU.testCase "doubleDec-K" $ + pack ("-99999999") @=? runConcat 1 (doubleDec (-99999999)) + , THU.testCase "doubleDec-L" $ + AsciiByteArray (pack ("6.6666666666667e-12")) @=? AsciiByteArray (runConcat 1 (doubleDec (2 / 300_000_000_000))) + , THU.testCase "doubleDec-M" $ + AsciiByteArray (pack ("6.6666666666667e-10")) @=? AsciiByteArray (runConcat 1 (doubleDec 6.666666666666667e-10)) + , THU.testCase "doubleDec-N" $ + AsciiByteArray (pack ("5e-10")) @=? AsciiByteArray (runConcat 1 (doubleDec 5.0e-10)) + , THU.testCase "doubleDec-O" $ + AsciiByteArray (pack ("1.6666666666667e-10")) @=? AsciiByteArray (runConcat 1 (doubleDec 1.6666666666666669e-10)) + , THU.testCase "doubleDec-P" $ + AsciiByteArray (pack ("1e-09")) @=? AsciiByteArray (runConcat 1 (doubleDec 1.0e-9)) + , THU.testCase "doubleDec-Q" $ + AsciiByteArray (pack ("1e-08")) @=? AsciiByteArray (runConcat 1 (doubleDec 1.0e-8)) + , THU.testCase "shortTextJsonString-A" $ + pack ("\"hello\"") @=? runConcat 1 (shortTextJsonString "hello") + , THU.testCase "shortTextJsonString-B" $ + pack ("\"\\\\_\\\"_/\"") @=? runConcat 1 (shortTextJsonString "\\_\"_/") + , THU.testCase "shortTextJsonString-C" $ + pack ("\"Hi\\r\\nLo\"") @=? runConcat 1 (shortTextJsonString "Hi\r\nLo") + , THU.testCase "shortTextJsonString-D" $ + pack ("\"Hi\\u001BLo\"") @=? runConcat 1 (shortTextJsonString "Hi\ESCLo") + , THU.testCase "word-16-tree" $ + Word16Tree.expectedSmall + @=? runConcat + 1 + (Word16Tree.encode Word16Tree.exampleSmall) + , THU.testCase "byteArray-small" $ + let a = replicateByte 3 0x50 + b = replicateByte 5 0x51 + in mconcat [a, b] + @=? runConcat + 1 + (byteArray a <> byteArray b) + , THU.testCase "byteArray-big" $ + let a = replicateByte 2105 0x50 + b = replicateByte 725 0x51 + c = replicateByte 900 0x52 + d = replicateByte 800 0x53 + e = replicateByte 700 0x54 + f = replicateByte 950 0x55 + g = replicateByte 975 0x56 + h = replicateByte 3000 0x57 + i = replicateByte 125 0x58 + in mconcat [a, b, c, d, e, f, g, h, i] + @=? runConcat + 1 + ( byteArray a + <> byteArray b + <> byteArray c + <> byteArray d + <> byteArray e + <> byteArray f + <> byteArray g + <> byteArray h + <> byteArray i + ) + , TQC.testProperty "word16ArrayLE" $ \(xs :: [Word16]) -> + let ys = Exts.fromList xs :: PrimArray Word16 + in runConcat 1 (foldMap word16LE xs) + === runConcat 1 (word16ArrayLE ys 0 (Prelude.length xs)) + , TQC.testProperty "word16ArrayBE" $ \(xs :: [Word16]) -> + let ys = Exts.fromList xs :: PrimArray Word16 + in runConcat 1 (foldMap word16BE xs) + === runConcat 1 (word16ArrayBE ys 0 (Prelude.length xs)) + , TQC.testProperty "word32ArrayLE" $ \(xs :: [Word32]) -> + let ys = Exts.fromList xs :: PrimArray Word32 + in runConcat 1 (foldMap word32LE xs) + === runConcat 1 (word32ArrayLE ys 0 (Prelude.length xs)) + , TQC.testProperty "word32ArrayBE" $ \(xs :: [Word32]) -> + let ys = Exts.fromList xs :: PrimArray Word32 + in runConcat 1 (foldMap word32BE xs) + === runConcat 1 (word32ArrayBE ys 0 (Prelude.length xs)) + , TQC.testProperty "word64ArrayLE" $ \(xs :: [Word64]) -> + let ys = Exts.fromList xs :: PrimArray Word64 + in runConcat 1 (foldMap word64LE xs) + === runConcat 1 (word64ArrayLE ys 0 (Prelude.length xs)) + , TQC.testProperty "word64ArrayBE" $ \(xs :: [Word64]) -> + let ys = Exts.fromList xs :: PrimArray Word64 + in runConcat 1 (foldMap word64BE xs) + === runConcat 1 (word64ArrayBE ys 0 (Prelude.length xs)) + , TQC.testProperty "word128ArrayLE" $ \(xs :: [Word128]) -> + let ys = Exts.fromList xs :: PrimArray Word128 + in runConcat 1 (foldMap word128LE xs) + === runConcat 1 (word128ArrayLE ys 0 (Prelude.length xs)) + , TQC.testProperty "word128ArrayBE" $ \(xs :: [Word128]) -> + let ys = Exts.fromList xs :: PrimArray Word128 + in runConcat 1 (foldMap word128BE xs) + === runConcat 1 (word128ArrayBE ys 0 (Prelude.length xs)) + , TQC.testProperty "word256ArrayLE" $ \(xs :: [Word256]) -> + let ys = Exts.fromList xs :: PrimArray Word256 + in runConcat 1 (foldMap word256LE xs) + === runConcat 1 (word256ArrayLE ys 0 (Prelude.length xs)) + , TQC.testProperty "word256ArrayBE" $ \(xs :: [Word256]) -> + let ys = Exts.fromList xs :: PrimArray Word256 + in runConcat 1 (foldMap word256BE xs) + === runConcat 1 (word256ArrayBE ys 0 (Prelude.length xs)) + , TQC.testProperty "word64Vlq" $ \(x :: Word64) -> + runConcat 1 (word64Vlq x) + === naiveVlq (fromIntegral x) + , TQC.testProperty "word64LEB128" $ \(x :: Word64) -> + runConcat 1 (word64LEB128 x) + === naiveLeb128 (fromIntegral x) + , TQC.testProperty "naturalDec-A" $ \(x :: Natural) -> + runConcat 1 (naturalDec x) + === pack (show x) + , TQC.testProperty "naturalDec-B" $ \(x :: Natural) -> + let y = 1234567892345678934678987654321 * x + in runConcat 1 (naturalDec y) + === pack (show y) + , testGroup + "leb128-encoding" + [ THU.testCase "16" $ + Chunks.concat (run 16 (word64LEB128 16)) + @=? Latin1.fromString "\x10" + , THU.testCase "1000000" $ + Chunks.concat (run 16 (word64LEB128 1000000)) + @=? Exts.fromList [0xc0, 0x84, 0x3d] + , THU.testCase "deadbeef-smile" $ do + let inp = Latin1.fromString "\xDE\xAD\xBE\xEF" + (Chunks.concat . run 16) (sevenEightSmile inp) + @=? Latin1.fromString "\x6F\x2B\x37\x6E\x0F" + ] + , testGroup + "seven/eight encoding" + [ THU.testCase "deadbeef" $ do + let inp = Latin1.fromString "\xDE\xAD\xBE\xEF" + (Chunks.concat . run 16) (sevenEightRight inp) + @=? Latin1.fromString "\x6F\x2B\x37\x6E\x78" + , THU.testCase "deadbeef-smile" $ do + let inp = Latin1.fromString "\xDE\xAD\xBE\xEF" + (Chunks.concat . run 16) (sevenEightSmile inp) + @=? Latin1.fromString "\x6F\x2B\x37\x6E\x0F" + ] + ] + , testGroup + "alternate" + [ TQC.testProperty "HexWord64" $ \x y -> + runConcat + 1 + ( fromBounded Nat.constant (HexWord64.word64PaddedUpperHex x) + <> fromBounded Nat.constant (HexWord64.word64PaddedUpperHex y) + ) + === pack (showWord64PaddedUpperHex x <> showWord64PaddedUpperHex y) + ] + , testGroup + "putMany" + [ THU.testCase "A" $ do + ref <- newIORef [] + let txt = "hello_world_are_you_listening" :: [Char] + putMany 7 ascii txt (bytesOntoRef ref) + res <- readIORef ref + id $ + [ map c2w "hello_" + , map c2w "world_" + , map c2w "are_yo" + , map c2w "u_list" + , map c2w "ening" + ] + @=? map Exts.toList (Exts.toList res) + ] + , testGroup + "putManyConsLength" + [ THU.testCase "A" $ do + ref <- newIORef [] + let txt = "hello_world_are_you_listening" :: [Char] + putManyConsLength + Nat.constant + (\n -> Bounded.word16BE (fromIntegral n)) + 16 + ascii + txt + (bytesOntoRef ref) + res <- readIORef ref + id $ + [ 0x00 : 0x0A : map c2w "hello_worl" + , 0x00 : 0x0A : map c2w "d_are_you_" + , 0x00 : 0x09 : map c2w "listening" + ] + @=? map Exts.toList (Exts.toList res) + ] +#ifdef QUOTER + , testGroup + "bytes templates" + [ THU.testCase "A" $ do + let name = Just ("foo" :: ShortText) + msgBuilder = [bldr|Hello `fromMaybe "World" name`!\n|] + msg = Chunks.concat . Builder.run 200 $ msgBuilder + in Ascii.fromString "Hello foo!\n" @=? msg + , THU.testCase "B" $ do + let one = "foo" :: ShortText + two = "bar" :: String + msgBuilder = [bldr|`one``two`|] + msg = Chunks.concat . Builder.run 200 $ msgBuilder + in Ascii.fromString "foobar" @=? msg + , THU.testCase "C" $ do + let msgBuilder = [bldr|a backtick for you: \`|] + msg = Chunks.concat . Builder.run 200 $ msgBuilder + in Ascii.fromString "a backtick for you: `" @=? msg + , THU.testCase "D" $ do + let i = 137 :: Int + msgBuilder = [bldr|there are `i` lights!|] + msg = Chunks.concat . Builder.run 200 $ msgBuilder + in Ascii.fromString "there are 137 lights!" @=? msg + ] +#endif ] - , testGroup "alternate" - [ TQC.testProperty "HexWord64" $ \x y -> - runConcat 1 - ( fromBounded Nat.constant (HexWord64.word64PaddedUpperHex x) - <> fromBounded Nat.constant (HexWord64.word64PaddedUpperHex y) - ) - === - pack (showWord64PaddedUpperHex x <> showWord64PaddedUpperHex y) - ] - , testGroup "putMany" - [ THU.testCase "A" $ do - ref <- newIORef [] - let txt = "hello_world_are_you_listening" :: [Char] - putMany 7 ascii txt (bytesOntoRef ref) - res <- readIORef ref - id $ - [ map c2w "hello_" - , map c2w "world_" - , map c2w "are_yo" - , map c2w "u_list" - , map c2w "ening" - ] @=? map Exts.toList (Exts.toList res) - ] - , testGroup "putManyConsLength" - [ THU.testCase "A" $ do - ref <- newIORef [] - let txt = "hello_world_are_you_listening" :: [Char] - putManyConsLength Nat.constant - (\n -> Bounded.word16BE (fromIntegral n)) - 16 ascii txt (bytesOntoRef ref) - res <- readIORef ref - id $ - [ 0x00 : 0x0A : map c2w "hello_worl" - , 0x00 : 0x0A : map c2w "d_are_you_" - , 0x00 : 0x09 : map c2w "listening" - ] @=? map Exts.toList (Exts.toList res) - ] - , testGroup "bytes templates" - [ THU.testCase "A" $ do - let name = Just ("foo" :: ShortText) - msgBuilder = [bldr|Hello `fromMaybe "World" name`!\n|] - msg = Chunks.concat . Builder.run 200 $ msgBuilder - in Ascii.fromString "Hello foo!\n" @=? msg - , THU.testCase "B" $ do - let one = "foo" :: ShortText - two = "bar" :: String - msgBuilder = [bldr|`one``two`|] - msg = Chunks.concat . Builder.run 200 $ msgBuilder - in Ascii.fromString "foobar" @=? msg - , THU.testCase "C" $ do - let msgBuilder = [bldr|a backtick for you: \`|] - msg = Chunks.concat . Builder.run 200 $ msgBuilder - in Ascii.fromString "a backtick for you: `" @=? msg - , THU.testCase "D" $ do - let i = 137 :: Int - msgBuilder = [bldr|there are `i` lights!|] - msg = Chunks.concat . Builder.run 200 $ msgBuilder - in Ascii.fromString "there are 137 lights!" @=? msg - ] - ] bytesOntoRef :: - IORef [PM.ByteArray] - -> MutableBytes Exts.RealWorld - -> IO () + IORef [PM.ByteArray] -> + MutableBytes Exts.RealWorld -> + IO () bytesOntoRef !ref (MutableBytes buf off len) = do rs <- readIORef ref dst <- PM.newByteArray len @@ -391,9 +408,10 @@ newtype AsciiByteArray = AsciiByteArray ByteArray deriving (Eq) instance Show AsciiByteArray where - show (AsciiByteArray b) = if Bytes.all (\w -> w >= 32 && w < 127) (Bytes.fromByteArray b) - then Latin1.toString (Bytes.fromByteArray b) - else show (show b) + show (AsciiByteArray b) = + if Bytes.all (\w -> w >= 32 && w < 127) (Bytes.fromByteArray b) + then Latin1.toString (Bytes.fromByteArray b) + else show (show b) instance Arbitrary Word128 where arbitrary = liftA2 Word128 TQC.arbitrary TQC.arbitrary @@ -409,29 +427,30 @@ zeroPadL n s naiveLeb128 :: Natural -> ByteArray naiveLeb128 x = Bytes.toByteArray (Bytes.unsafeDrop 1 (Exts.fromList (0xFF : go [] x))) - where + where go !xs !n = - let (q,r) = quotRem n 128 + let (q, r) = quotRem n 128 r' = fromIntegral @Natural @Word8 r - w = if q == 0 - then r' - else Bits.setBit r' 7 + w = + if q == 0 + then r' + else Bits.setBit r' 7 xs' = w : xs - in if q == 0 + in if q == 0 then L.reverse xs' else go xs' q naiveVlq :: Natural -> ByteArray naiveVlq x = Bytes.toByteArray (Bytes.unsafeDrop 1 (Exts.fromList (0xFF : go [] x))) - where + where go !xs !n = - let (q,r) = quotRem n 128 + let (q, r) = quotRem n 128 r' = fromIntegral @Natural @Word8 r w = case xs of [] -> r' _ -> Bits.setBit r' 7 xs' = w : xs - in if q == 0 + in if q == 0 then xs' else go xs' q