diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS deleted file mode 100644 index f6c0b22..0000000 --- a/.github/CODEOWNERS +++ /dev/null @@ -1 +0,0 @@ -@byteverse/l3c diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml deleted file mode 100644 index f3f5be4..0000000 --- a/.github/workflows/build.yaml +++ /dev/null @@ -1,11 +0,0 @@ -name: build -on: - pull_request: - branches: - - "*" - -jobs: - call-workflow: - uses: byteverse/.github/.github/workflows/build-matrix.yaml@main - with: - cabal-file: bytebuild.cabal diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml deleted file mode 100644 index 9411962..0000000 --- a/.github/workflows/release.yaml +++ /dev/null @@ -1,10 +0,0 @@ -name: release -on: - push: - tags: - - "*" - -jobs: - call-workflow: - uses: byteverse/.github/.github/workflows/release.yaml@main - secrets: inherit diff --git a/.gitignore b/.gitignore index ccd94e8..28d589b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,3 @@ -.vscode/ dist dist-* cabal-dev @@ -12,7 +11,6 @@ cabal-dev .hsenv .cabal-sandbox/ cabal.sandbox.config -cabal.project.local *.prof *.aux *.hp diff --git a/CHANGELOG.md b/CHANGELOG.md index 4f03773..23e6d08 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,28 +5,10 @@ Note: Prior to version 0.3.4.0, this library was named `small-bytearray-builder` is now just a compatibility shim to ease the migration process. -## 0.3.16.2 -- 2024-02-06 - -* Restore import statement for `liftA2` to fix build for GHC 9.4. - -## 0.3.16.1 -- 2024-02-02 - -* Remove all CPP -* Drop support for GHC < 9.4 -* Drop support for text < 2.0 - -## 0.3.16.0 -- 2024-01-29 - -* Add `wordPaddedDec3`. -* Add `Data.Bytes.Builder.Avro`. -* Add `word16LEB128`. -* Stop accepting versions of text lower than 2.0. - -## 0.3.15.0 -- 2024-01-05 +## 0.3.15.0 -- 2023-??-?? * Add `Data.Bytes.Builder.Unsafe.pasteUtf8TextJson#` for users who need to perform JSON string encoding without using a builder. -* Add `Data.Bytes.Builder.textJsonString` when building with text 2.0+ ## 0.3.14.0 -- 2023-07-20 @@ -145,7 +127,7 @@ to ease the migration process. * Add `flush`, `copy`, and `insert` for better control when converting byte sequences to builders. * Add `shortByteString` to improve interoperability with the - `bytestring` library. + `bytestring` library. ## 0.2.1.0 -- 2019-09-05 diff --git a/Setup.hs b/Setup.hs new file mode 100644 index 0000000..9a994af --- /dev/null +++ b/Setup.hs @@ -0,0 +1,2 @@ +import Distribution.Simple +main = defaultMain diff --git a/bench/Cell.hs b/bench/Cell.hs index 3c76016..77d4f61 100644 --- a/bench/Cell.hs +++ b/bench/Cell.hs @@ -1,14 +1,14 @@ -{-# LANGUAGE OverloadedLists #-} -{-# LANGUAGE OverloadedStrings #-} +{-# language OverloadedLists #-} +{-# language OverloadedStrings #-} module Cell - ( Cell (..) + ( Cell(..) , cells ) where -import Data.Primitive (SmallArray) -import Data.Text.Short (ShortText) import Data.Word (Word32) +import Data.Text.Short (ShortText) +import Data.Primitive (SmallArray) -- A cell in a CSV file data Cell @@ -18,14 +18,15 @@ data Cell -- Some sample data to encode as a CSV cells :: SmallArray (SmallArray Cell) cells = - [ [CellString "Randy", CellString "Gutiérrez", CellNumber 41, CellNumber 343] - , [CellString "Édith", CellString "Piaf", CellNumber 63, CellNumber 453] - , [CellString "Martha", CellString "Washington", CellNumber 51, CellNumber 634] - , [CellString "Julius", CellString "Caesar", CellNumber 1, CellNumber 6922] - , [CellString "Robert", CellString "Redford", CellNumber 24, CellNumber 617] - , [CellString "Violet", CellString "Crawley", CellNumber 71, CellNumber 150] - , [CellString "Lázaro", CellString "Cárdenas", CellNumber 58, CellNumber 299] - , [CellString "Anastasia", CellString "San Martin", CellNumber 103, CellNumber 3214] - , [CellString "Mad", CellString "Max", CellNumber 37, CellNumber 918] - , [CellString "Sidonie-Gabrielle", CellString "Collette", CellNumber 25, CellNumber 904] + [ [ CellString "Randy", CellString "Gutiérrez", CellNumber 41, CellNumber 343 ] + , [ CellString "Édith", CellString "Piaf", CellNumber 63, CellNumber 453 ] + , [ CellString "Martha", CellString "Washington", CellNumber 51, CellNumber 634 ] + , [ CellString "Julius", CellString "Caesar", CellNumber 1, CellNumber 6922 ] + , [ CellString "Robert", CellString "Redford", CellNumber 24, CellNumber 617 ] + , [ CellString "Violet", CellString "Crawley", CellNumber 71, CellNumber 150 ] + , [ CellString "Lázaro", CellString "Cárdenas", CellNumber 58, CellNumber 299 ] + , [ CellString "Anastasia", CellString "San Martin", CellNumber 103, CellNumber 3214 ] + , [ CellString "Mad", CellString "Max", CellNumber 37, CellNumber 918 ] + , [ CellString "Sidonie-Gabrielle", CellString "Collette", CellNumber 25, CellNumber 904 ] ] + diff --git a/bench/Main.hs b/bench/Main.hs index 1c93f57..cc29af6 100644 --- a/bench/Main.hs +++ b/bench/Main.hs @@ -1,8 +1,9 @@ -{-# LANGUAGE OverloadedStrings #-} +{-# language LambdaCase #-} +{-# language OverloadedStrings #-} import Data.Primitive (ByteArray) import Data.Word (Word64) -import Gauge (bench, bgroup, whnf) +import Gauge (bgroup,bench,whnf) import Gauge.Main (defaultMain) import qualified Arithmetic.Nat as Nat @@ -10,87 +11,70 @@ import qualified Data.Bytes.Builder as B import qualified Data.Bytes.Builder.Bounded as U import qualified Cell -import qualified HexWord64 import qualified SimpleCsv +import qualified HexWord64 import qualified Word16Tree main :: IO () -main = - defaultMain - [ bgroup - "w64" - [ bgroup - "hex" - [ bench "library" (whnf encodeHexWord64s w64s) - , bench "loop" (whnf encodeHexWord64sLoop w64s) - ] - ] - , bgroup - "unbounded" - [ bench "csv-no-escape" $ - whnf - (\x -> B.run 4080 (SimpleCsv.encodeRows x)) - Cell.cells - , bench "word-16-tree-small" $ - whnf - (\x -> B.run 4080 (Word16Tree.encode x)) - Word16Tree.exampleSmall - , bench "word-16-tree-2000" $ - whnf - (\x -> B.run ((4096 * 16) - 16) (Word16Tree.encode x)) - Word16Tree.example2000 - , bench "word-16-tree-9000" $ - whnf - (\x -> B.run ((4096 * 64) - 16) (Word16Tree.encode x)) - Word16Tree.example9000 - ] +main = defaultMain + [ bgroup "w64" + [ bgroup "hex" + [ bench "library" (whnf encodeHexWord64s w64s) + , bench "loop" (whnf encodeHexWord64sLoop w64s) + ] ] + , bgroup "unbounded" + [ bench "csv-no-escape" $ whnf + (\x -> B.run 4080 (SimpleCsv.encodeRows x)) + Cell.cells + , bench "word-16-tree-small" $ whnf + (\x -> B.run 4080 (Word16Tree.encode x)) + Word16Tree.exampleSmall + , bench "word-16-tree-2000" $ whnf + (\x -> B.run ((4096 * 16) - 16) (Word16Tree.encode x)) + Word16Tree.example2000 + , bench "word-16-tree-9000" $ whnf + (\x -> B.run ((4096 * 64) - 16) (Word16Tree.encode x)) + Word16Tree.example9000 + ] + ] w64s :: Word64s -w64s = - Word64s - 0xde2b8a480cf77113 - 0x48f1668ca2a68b45 - 0xd262fbaa0b2f473c - 0xbab20547f4919d9f - 0xb7ec16121704db43 - 0x9c259f5bfa90e1eb - 0xd451eca11d9873ad - 0xbd927e8d4c879d02 +w64s = Word64s + 0xde2b8a480cf77113 + 0x48f1668ca2a68b45 + 0xd262fbaa0b2f473c + 0xbab20547f4919d9f + 0xb7ec16121704db43 + 0x9c259f5bfa90e1eb + 0xd451eca11d9873ad + 0xbd927e8d4c879d02 -data Word64s - = Word64s - !Word64 - !Word64 - !Word64 - !Word64 - !Word64 - !Word64 - !Word64 - !Word64 +data Word64s = Word64s + !Word64 !Word64 !Word64 !Word64 + !Word64 !Word64 !Word64 !Word64 encodeHexWord64s :: Word64s -> ByteArray -{-# NOINLINE encodeHexWord64s #-} -encodeHexWord64s (Word64s a b c d e f g h) = - U.run Nat.constant $ - U.word64PaddedUpperHex a - `U.append` U.word64PaddedUpperHex b - `U.append` U.word64PaddedUpperHex c - `U.append` U.word64PaddedUpperHex d - `U.append` U.word64PaddedUpperHex e - `U.append` U.word64PaddedUpperHex f - `U.append` U.word64PaddedUpperHex g - `U.append` U.word64PaddedUpperHex h +{-# noinline encodeHexWord64s #-} +encodeHexWord64s (Word64s a b c d e f g h) = U.run Nat.constant $ + U.word64PaddedUpperHex a `U.append` + U.word64PaddedUpperHex b `U.append` + U.word64PaddedUpperHex c `U.append` + U.word64PaddedUpperHex d `U.append` + U.word64PaddedUpperHex e `U.append` + U.word64PaddedUpperHex f `U.append` + U.word64PaddedUpperHex g `U.append` + U.word64PaddedUpperHex h encodeHexWord64sLoop :: Word64s -> ByteArray -{-# NOINLINE encodeHexWord64sLoop #-} -encodeHexWord64sLoop (Word64s a b c d e f g h) = - U.run Nat.constant $ - HexWord64.word64PaddedUpperHex a - `U.append` HexWord64.word64PaddedUpperHex b - `U.append` HexWord64.word64PaddedUpperHex c - `U.append` HexWord64.word64PaddedUpperHex d - `U.append` HexWord64.word64PaddedUpperHex e - `U.append` HexWord64.word64PaddedUpperHex f - `U.append` HexWord64.word64PaddedUpperHex g - `U.append` HexWord64.word64PaddedUpperHex h +{-# noinline encodeHexWord64sLoop #-} +encodeHexWord64sLoop (Word64s a b c d e f g h) = U.run Nat.constant $ + HexWord64.word64PaddedUpperHex a `U.append` + HexWord64.word64PaddedUpperHex b `U.append` + HexWord64.word64PaddedUpperHex c `U.append` + HexWord64.word64PaddedUpperHex d `U.append` + HexWord64.word64PaddedUpperHex e `U.append` + HexWord64.word64PaddedUpperHex f `U.append` + HexWord64.word64PaddedUpperHex g `U.append` + HexWord64.word64PaddedUpperHex h + diff --git a/bench/SimpleCsv.hs b/bench/SimpleCsv.hs index 34a2ded..1b47345 100644 --- a/bench/SimpleCsv.hs +++ b/bench/SimpleCsv.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE LambdaCase #-} +{-# language LambdaCase #-} -- A variant of CSV encoding that does not perform -- any escaping or quoting. This is in its own module @@ -8,24 +8,22 @@ module SimpleCsv ( encodeRows ) where -import Cell (Cell (..)) +import Cell (Cell(..)) import Data.Primitive (SmallArray) -import qualified Data.Bytes.Builder as B import qualified Data.Foldable as F +import qualified Data.Bytes.Builder as B encodeRows :: SmallArray (SmallArray Cell) -> B.Builder -encodeRows = - F.foldr - (\r x -> encodeSimpleCsvRow r (B.ascii '\n' <> x)) - mempty +encodeRows = F.foldr + (\r x -> encodeSimpleCsvRow r (B.ascii '\n' <> x)) + mempty encodeSimpleCsvRow :: SmallArray Cell -> B.Builder -> B.Builder -encodeSimpleCsvRow cs b = - F.foldr - (\c x -> encodeSimpleCsvCell c <> B.ascii ',' <> x) - b - cs +encodeSimpleCsvRow cs b = F.foldr + (\c x -> encodeSimpleCsvCell c <> B.ascii ',' <> x) + b + cs encodeSimpleCsvCell :: Cell -> B.Builder encodeSimpleCsvCell = \case diff --git a/bytebuild.cabal b/bytebuild.cabal index ffb6cd7..309042d 100644 --- a/bytebuild.cabal +++ b/bytebuild.cabal @@ -1,17 +1,17 @@ -cabal-version: 2.2 -name: bytebuild -version: 0.3.16.2 -synopsis: Build byte arrays +cabal-version: 2.2 +name: bytebuild +version: 0.3.15.0 +synopsis: Build byte arrays description: This is similar to the builder facilities provided by `Data.ByteString.Builder`. It is intended to be used in situations where the following apply: . * An individual entity will be serialized as a small - number of bytes (less than 512). + number of bytes (less than 512). . * A large number (more than 32) of entities will be serialized - one after another without anything between them. + one after another without anything between them. . Unlike builders from the `bytestring` package, these builders do not track their state when they run out of space. A builder @@ -19,136 +19,109 @@ description: of the next chunk. This strategy for building is suitable for most CSVs and several line protocols (carbon, InfluxDB, etc.). -homepage: https://github.com/byteverse/bytebuild -bug-reports: https://github.com/byteverse/bytebuild/issues -license: BSD-3-Clause -license-file: LICENSE -author: Andrew Martin -maintainer: amartin@layer3com.com -copyright: 2019 Andrew Martin -category: Data -extra-doc-files: CHANGELOG.md -tested-with: GHC ==9.4.8 || ==9.6.5 || ==9.8.2 || ==9.10.1 - -common build-settings - default-language: Haskell2010 - ghc-options: -Wall -Wunused-packages - -flag quoter - manual: True - description: Include the quasiquoter - default: True +homepage: https://github.com/byteverse/bytebuild +bug-reports: https://github.com/byteverse/bytebuild/issues +license: BSD-3-Clause +license-file: LICENSE +author: Andrew Martin +maintainer: andrew.thaddeus@gmail.com +copyright: 2019 Andrew Martin +category: Data +extra-source-files: CHANGELOG.md flag checked - manual: True + manual: True description: Add bounds-checking to primitive array operations - default: False + default: False library - import: build-settings exposed-modules: Data.Bytes.Builder - Data.Bytes.Builder.Avro + Data.Bytes.Builder.Class + Data.Bytes.Builder.Template + Data.Bytes.Builder.Unsafe Data.Bytes.Builder.Bounded Data.Bytes.Builder.Bounded.Class Data.Bytes.Builder.Bounded.Unsafe - Data.Bytes.Builder.Class - Data.Bytes.Builder.Unsafe - other-modules: Compat Op - - reexported-modules: Data.Bytes.Chunks + reexported-modules: + Data.Bytes.Chunks build-depends: - , base >=4.17.0.0 && <4.21 - , byteslice >=0.2.6 && <0.3 - , bytestring >=0.10.8.2 && <0.13 - , integer-logarithms >=1.0.3 && <1.1 - , natural-arithmetic >=0.1 && <0.3 - , primitive-offset >=0.2 && <0.3 - , run-st >=0.1.2 && <0.2 - , text >=2.0 && <2.2 - , text-short >=0.1.3 && <0.2 - , wide-word >=0.1.0.9 && <0.2 + , base >=4.12.0.0 && <4.19 + , byteslice >=0.2.6 && <0.3 + , bytestring >=0.10.8.2 && <0.12 + , haskell-src-meta >=0.8 + , integer-logarithms >=1.0.3 && <1.1 + , natural-arithmetic >=0.1 && <0.3 + , primitive-offset >=0.2 && <0.3 + , run-st >=0.1.2 && <0.2 + , template-haskell >=2.16 + , text >=1.2 && <2.2 + , text-short >=0.1.3 && <0.2 + , wide-word >=0.1.0.9 && <0.2 , zigzag - - if flag(quoter) - build-depends: - haskell-src-meta >=0.8.13 - , template-haskell >=2.16 - exposed-modules: Data.Bytes.Builder.Template - - if impl(ghc >=9.2) + if impl(ghc >= 9.2) hs-source-dirs: src-9.2 - else - if impl(ghc >=8.10) + if impl(ghc >= 8.10) hs-source-dirs: src-9.0 - if flag(checked) - build-depends: primitive-checked >=0.7 && <0.10 + build-depends: primitive-checked >= 0.7 && <0.10 hs-source-dirs: src-checked - else - build-depends: primitive >=0.7 && <0.10 + build-depends: primitive >= 0.7 && <0.10 hs-source-dirs: src-unchecked - - ghc-options: -O2 - hs-source-dirs: src - c-sources: cbits/bytebuild_custom.c + ghc-options: -Wall -O2 + hs-source-dirs: src + default-language: Haskell2010 + c-sources: cbits/bytebuild_custom.c test-suite test - import: build-settings - type: exitcode-stdio-1.0 - hs-source-dirs: test common - main-is: Main.hs + default-language: Haskell2010 + type: exitcode-stdio-1.0 + hs-source-dirs: test, common + main-is: Main.hs + ghc-options: -O2 -Wall other-modules: HexWord64 Word16Tree - build-depends: - , base >=4.12.0.0 && <5 + , QuickCheck >=2.13.1 && <2.15 + , base >=4.12.0.0 && <5 , bytebuild , byteslice , bytestring , natural-arithmetic , primitive - , QuickCheck >=2.13.1 && <2.16 - , quickcheck-instances >=0.3.22 - , tasty >=1.2.3 && <1.6 - , tasty-hunit >=0.10.0.2 && <0.11 - , tasty-quickcheck >=0.10.1 && <0.11 - , text >=2.0 && <2.2 - , wide-word >=0.1.0.9 && <0.2 - - default-extensions: CPP - - if flag(quoter) - cpp-options: -DQUOTER - build-depends: text-short + , primitive-unlifted >=0.1.2 + , quickcheck-classes >=0.6.4 + , quickcheck-instances >=0.3.22 + , text-short + , tasty >=1.2.3 && <1.6 + , tasty-hunit >=0.10.0.2 && <0.11 + , tasty-quickcheck >=0.10.1 && <0.11 + , text >=1.2 && <2.2 + , vector + , wide-word >=0.1.0.9 && <0.2 benchmark bench - import: build-settings - type: exitcode-stdio-1.0 + type: exitcode-stdio-1.0 build-depends: , base , bytebuild - , byteslice - , gauge >=0.2.4 + , gauge >= 0.2.4 , natural-arithmetic , primitive , text-short - - ghc-options: -O2 - hs-source-dirs: bench common - main-is: Main.hs + , byteslice + ghc-options: -Wall -O2 + default-language: Haskell2010 + hs-source-dirs: bench, common + main-is: Main.hs other-modules: Cell HexWord64 SimpleCsv Word16Tree - -source-repository head - type: git - location: git://github.com/byteverse/bytebuild.git diff --git a/common/HexWord64.hs b/common/HexWord64.hs index 23acfe4..b3af3c8 100644 --- a/common/HexWord64.hs +++ b/common/HexWord64.hs @@ -1,10 +1,10 @@ -{-# LANGUAGE BangPatterns #-} -{-# LANGUAGE DataKinds #-} -{-# LANGUAGE MagicHash #-} -{-# LANGUAGE PolyKinds #-} -{-# LANGUAGE ScopedTypeVariables #-} -{-# LANGUAGE TypeApplications #-} -{-# LANGUAGE UnboxedTuples #-} +{-# language BangPatterns #-} +{-# language ScopedTypeVariables #-} +{-# language DataKinds #-} +{-# language UnboxedTuples #-} +{-# language MagicHash #-} +{-# language PolyKinds #-} +{-# language TypeApplications #-} module HexWord64 ( word64PaddedUpperHex @@ -15,37 +15,34 @@ module HexWord64 -- the hoop jumping, the explicit loop used here is still outperformed -- by just inlining the loop. +import GHC.ST (ST(ST)) import Data.Bits -import Data.Bytes.Builder.Bounded.Unsafe (Builder, construct) +import Data.Bytes.Builder.Bounded.Unsafe (Builder,construct) import Data.Primitive import Data.Word import GHC.Exts -import GHC.ST (ST (ST)) import qualified Control.Monad.Primitive as PM type ST# s (a :: TYPE (r :: RuntimeRep)) = State# s -> (# State# s, a #) word64PaddedUpperHex :: Word64 -> Builder 16 -word64PaddedUpperHex w = construct $ \a b -> - ST - ( \s0 -> case word64PaddedUpperHexLoop w 60 a b s0 of - (# s1, i #) -> (# s1, I# i #) - ) +word64PaddedUpperHex w = construct $ \a b -> ST + (\s0 -> case word64PaddedUpperHexLoop w 60 a b s0 of + (# s1, i #) -> (# s1, I# i #) + ) word64PaddedUpperHexLoop :: forall s. Word64 -> Int -> MutableByteArray s -> Int -> ST# s Int# -word64PaddedUpperHexLoop !w !shiftAmount !arr !i@(I# i#) s0 = - if shiftAmount >= 0 - then case PM.internal @(ST s) (writeByteArray arr i (toHexUpper (unsafeShiftR w shiftAmount))) s0 of - (# s1, (_ :: ()) #) -> word64PaddedUpperHexLoop w (shiftAmount - 4) arr (i + 1) s1 - else (# s0, i# #) +word64PaddedUpperHexLoop !w !shiftAmount !arr !i@(I# i#) s0 = if shiftAmount >= 0 + then case PM.internal @(ST s) (writeByteArray arr i (toHexUpper (unsafeShiftR w shiftAmount))) s0 of + (# s1, (_ :: ()) #) -> word64PaddedUpperHexLoop w (shiftAmount - 4) arr (i + 1) s1 + else (# s0, i# #) toHexUpper :: Word64 -> Word8 -toHexUpper w' = - fromIntegral $ - (complement theMask .&. loSolved) - .|. (theMask .&. hiSolved) - where +toHexUpper w' = fromIntegral + $ (complement theMask .&. loSolved) + .|. (theMask .&. hiSolved) + where w = w' .&. 0xF -- This is all ones if the value was >= 10 theMask = (1 .&. unsafeShiftR (w - 10) 63) - 1 diff --git a/common/Word16Tree.hs b/common/Word16Tree.hs index a07d056..2163a5c 100644 --- a/common/Word16Tree.hs +++ b/common/Word16Tree.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE BangPatterns #-} +{-# language BangPatterns #-} module Word16Tree ( Word16Tree @@ -9,11 +9,11 @@ module Word16Tree , expectedSmall ) where -import qualified Data.Bytes as Bytes import Data.Bytes.Builder as B -import qualified Data.Bytes.Text.Ascii -import Data.Primitive (ByteArray) import Data.Word (Word16) +import Data.Primitive (ByteArray) +import qualified Data.Bytes as Bytes +import qualified Data.Bytes.Text.Ascii data Word16Tree = Branch !Word16Tree !Word16Tree @@ -23,62 +23,63 @@ encode :: Word16Tree -> Builder encode (Leaf w) = B.word16PaddedUpperHex w encode (Branch a b) = B.ascii '(' - <> encode a - <> B.ascii ',' - <> encode b - <> B.ascii ')' + <> + encode a + <> + B.ascii ',' + <> + encode b + <> + B.ascii ')' expectedSmall :: ByteArray -expectedSmall = - Bytes.toByteArray $ - Data.Bytes.Text.Ascii.fromString - "((AB59,(1F33,2E71)),((((FA9A,247B),890C),(0F13,((55BF,7CF1),389B))),1205))" +expectedSmall = Bytes.toByteArray $ Data.Bytes.Text.Ascii.fromString + "((AB59,(1F33,2E71)),((((FA9A,247B),890C),(0F13,((55BF,7CF1),389B))),1205))" + exampleSmall :: Word16Tree -exampleSmall = - Branch - ( Branch - (Leaf 0xAB59) - ( Branch - (Leaf 0x1F33) - (Leaf 0x2E71) - ) +exampleSmall = Branch + (Branch + (Leaf 0xAB59) + (Branch + (Leaf 0x1F33) + (Leaf 0x2E71) ) - ( Branch - ( Branch - ( Branch - ( Branch - (Leaf 0xFA9A) - (Leaf 0x247B) - ) - (Leaf 0x890C) - ) - ( Branch - (Leaf 0x0F13) - ( Branch - ( Branch - (Leaf 0x55BF) - (Leaf 0x7CF1) - ) - (Leaf 0x389B) - ) - ) + ) + (Branch + (Branch + (Branch + (Branch + (Leaf 0xFA9A) + (Leaf 0x247B) ) - (Leaf 0x1205) + (Leaf 0x890C) + ) + (Branch + (Leaf 0x0F13) + (Branch + (Branch + (Leaf 0x55BF) + (Leaf 0x7CF1) + ) + (Leaf 0x389B) + ) + ) ) + (Leaf 0x1205) + ) example2000 :: Word16Tree -{-# NOINLINE example2000 #-} +{-# noinline example2000 #-} example2000 = balanced 0 2000 example9000 :: Word16Tree -{-# NOINLINE example9000 #-} +{-# noinline example9000 #-} example9000 = balanced 0 9000 balanced :: Word16 -> Word16 -> Word16Tree balanced !off !n | n == 0 = Leaf off | n == 1 = Leaf (off + 1) - | otherwise = - let x = div n 2 - in Branch (balanced off x) (balanced (off + x) (n - x)) + | otherwise = let x = div n 2 in + Branch (balanced off x) (balanced (off + x) (n - x)) diff --git a/fourmolu.yaml b/fourmolu.yaml deleted file mode 100644 index 40cd005..0000000 --- a/fourmolu.yaml +++ /dev/null @@ -1,51 +0,0 @@ -# Number of spaces per indentation step -indentation: 2 - -# Max line length for automatic line breaking -column-limit: 200 - -# Styling of arrows in type signatures (choices: trailing, leading, or leading-args) -function-arrows: trailing - -# How to place commas in multi-line lists, records, etc. (choices: leading or trailing) -comma-style: leading - -# Styling of import/export lists (choices: leading, trailing, or diff-friendly) -import-export-style: leading - -# Whether to full-indent or half-indent 'where' bindings past the preceding body -indent-wheres: false - -# Whether to leave a space before an opening record brace -record-brace-space: true - -# Number of spaces between top-level declarations -newlines-between-decls: 1 - -# How to print Haddock comments (choices: single-line, multi-line, or multi-line-compact) -haddock-style: multi-line - -# How to print module docstring -haddock-style-module: null - -# Styling of let blocks (choices: auto, inline, newline, or mixed) -let-style: auto - -# How to align the 'in' keyword with respect to the 'let' keyword (choices: left-align, right-align, or no-space) -in-style: right-align - -# Whether to put parentheses around a single constraint (choices: auto, always, or never) -single-constraint-parens: always - -# Output Unicode syntax (choices: detect, always, or never) -unicode: never - -# Give the programmer more choice on where to insert blank lines -respectful: true - -# Fixity information for operators -fixities: [] - -# Module reexports Fourmolu should know about -reexports: [] - diff --git a/src-9.0/Compat.hs b/src-9.0/Compat.hs index bf82334..b7f978a 100644 --- a/src-9.0/Compat.hs +++ b/src-9.0/Compat.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE MagicHash #-} +{-# language MagicHash #-} -- This is actually used with both GHC 8.10 and with GHC 9.0. -- The name of the directory is a little misleading. @@ -14,40 +14,40 @@ module Compat , word32ToWord# ) where -import GHC.Exts (Int#, Word#) +import GHC.Exts (Int#,Word#) int8ToInt# :: Int# -> Int# -{-# INLINE int8ToInt# #-} +{-# inline int8ToInt# #-} int8ToInt# x = x int16ToInt# :: Int# -> Int# -{-# INLINE int16ToInt# #-} +{-# inline int16ToInt# #-} int16ToInt# x = x int32ToInt# :: Int# -> Int# -{-# INLINE int32ToInt# #-} +{-# inline int32ToInt# #-} int32ToInt# x = x wordToWord8# :: Word# -> Word# -{-# INLINE wordToWord8# #-} -wordToWord8# x = x +{-# inline wordToWord8# #-} +wordToWord8# x = x wordToWord16# :: Word# -> Word# -{-# INLINE wordToWord16# #-} +{-# inline wordToWord16# #-} wordToWord16# x = x wordToWord32# :: Word# -> Word# -{-# INLINE wordToWord32# #-} +{-# inline wordToWord32# #-} wordToWord32# x = x word8ToWord# :: Word# -> Word# -{-# INLINE word8ToWord# #-} +{-# inline word8ToWord# #-} word8ToWord# x = x word16ToWord# :: Word# -> Word# -{-# INLINE word16ToWord# #-} +{-# inline word16ToWord# #-} word16ToWord# x = x word32ToWord# :: Word# -> Word# -{-# INLINE word32ToWord# #-} +{-# inline word32ToWord# #-} word32ToWord# x = x diff --git a/src-9.2/Compat.hs b/src-9.2/Compat.hs index fb5a93b..177c94c 100644 --- a/src-9.2/Compat.hs +++ b/src-9.2/Compat.hs @@ -1,10 +1,10 @@ -{-# LANGUAGE MagicHash #-} +{-# language MagicHash #-} -module Compat +module Compat ( int8ToInt# , int16ToInt# , int32ToInt# - , wordToWord8# + , wordToWord8# , wordToWord16# , wordToWord32# , word8ToWord# diff --git a/src-checked/Op.hs b/src-checked/Op.hs index d41684d..a9b4211 100644 --- a/src-checked/Op.hs +++ b/src-checked/Op.hs @@ -1,5 +1,5 @@ -{-# LANGUAGE MagicHash #-} -{-# LANGUAGE UnboxedTuples #-} +{-# language MagicHash #-} +{-# language UnboxedTuples #-} module Op ( writeCharArray# @@ -7,9 +7,9 @@ module Op , copyMutableByteArray# ) where -import GHC.Exts (ByteArray#, Char#, Int#, MutableByteArray#, State#, (<#), (>=#)) +import GHC.Exts ((<#),(>=#),State#,Int#,MutableByteArray#,ByteArray#,Char#) +import GHC.Int (Int(I#)) import qualified GHC.Exts as Exts -import GHC.Int (Int (I#)) writeCharArray# :: MutableByteArray# s -> Int# -> Char# -> State# s -> State# s writeCharArray# arr i v st = case i <# 0# of @@ -27,8 +27,8 @@ copyByteArray# src soff dst doff len s0 = , I# doff >= 0 , I# len >= 0 , I# doff + I# len <= I# sz - , I# soff + I# len <= I# (Exts.sizeofByteArray# src) -> - Exts.copyByteArray# src soff dst doff len s1 + , I# soff + I# len <= I# (Exts.sizeofByteArray# src) + -> Exts.copyByteArray# src soff dst doff len s1 | otherwise -> error "copyByteArray#: index range out of bounds" copyMutableByteArray# :: MutableByteArray# s -> Int# -> MutableByteArray# s -> Int# -> Int# -> State# s -> State# s @@ -40,6 +40,6 @@ copyMutableByteArray# src soff dst doff len s0 = , I# doff >= 0 , I# len >= 0 , I# doff + I# len <= I# szDst - , I# soff + I# len <= I# szSrc -> - Exts.copyMutableByteArray# src soff dst doff len s2 + , I# soff + I# len <= I# szSrc + -> Exts.copyMutableByteArray# src soff dst doff len s2 | otherwise -> error "copyMutableByteArray#: index range out of bounds" diff --git a/src-unchecked/Op.hs b/src-unchecked/Op.hs index 9092c35..32874e2 100644 --- a/src-unchecked/Op.hs +++ b/src-unchecked/Op.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE MagicHash #-} +{-# language MagicHash #-} module Op ( writeCharArray# @@ -6,4 +6,4 @@ module Op , copyMutableByteArray# ) where -import GHC.Exts (copyByteArray#, copyMutableByteArray#, writeCharArray#) +import GHC.Exts (copyMutableByteArray#,writeCharArray#,copyByteArray#,copyMutableByteArray#) diff --git a/src/Data/Bytes/Builder.hs b/src/Data/Bytes/Builder.hs index 1a89a4a..7d15de7 100644 --- a/src/Data/Bytes/Builder.hs +++ b/src/Data/Bytes/Builder.hs @@ -1,18 +1,19 @@ -{-# LANGUAGE BangPatterns #-} -{-# LANGUAGE DataKinds #-} -{-# LANGUAGE DuplicateRecordFields #-} -{-# LANGUAGE MagicHash #-} -{-# LANGUAGE NumericUnderscores #-} -{-# LANGUAGE RankNTypes #-} -{-# LANGUAGE ScopedTypeVariables #-} -{-# LANGUAGE TypeApplications #-} -{-# LANGUAGE UnboxedTuples #-} +{-# language CPP #-} +{-# language BangPatterns #-} +{-# language DataKinds #-} +{-# language DuplicateRecordFields #-} +{-# language LambdaCase #-} +{-# language MagicHash #-} +{-# language NumericUnderscores #-} +{-# language RankNTypes #-} +{-# language ScopedTypeVariables #-} +{-# language TypeApplications #-} +{-# language UnboxedTuples #-} module Data.Bytes.Builder ( -- * Bounded Primitives Builder , fromBounded - -- * Evaluation , run , runOnto @@ -20,7 +21,6 @@ module Data.Bytes.Builder , reversedOnto , putMany , putManyConsLength - -- * Materialized Byte Sequences , bytes , chunks @@ -30,21 +30,19 @@ module Data.Bytes.Builder , insert , byteArray , shortByteString +#if MIN_VERSION_text(2,0,0) , textUtf8 - , textJsonString +#endif , shortTextUtf8 , shortTextJsonString , cstring , cstring# , cstringLen , stringUtf8 - -- * Byte Sequence Encodings , sevenEightRight , sevenEightSmile - -- * Encode Integral Types - -- ** Human-Readable , word64Dec , word32Dec @@ -58,21 +56,16 @@ module Data.Bytes.Builder , int8Dec , intDec , integerDec - -- * Unsigned Words - -- ** 64-bit , word64PaddedUpperHex - -- ** 32-bit , word32PaddedUpperHex - -- ** 16-bit , word16PaddedUpperHex , word16PaddedLowerHex , word16LowerHex , word16UpperHex - -- ** 8-bit , word8PaddedUpperHex , word8LowerHex @@ -85,12 +78,9 @@ module Data.Bytes.Builder , ascii7 , ascii8 , char - -- ** Machine-Readable - -- *** One , word8 - -- **** Big Endian , word256BE , word128BE @@ -100,7 +90,6 @@ module Data.Bytes.Builder , int64BE , int32BE , int16BE - -- **** Little Endian , word256LE , word128LE @@ -110,24 +99,19 @@ module Data.Bytes.Builder , int64LE , int32LE , int16LE - -- **** LEB128 , intLEB128 , int32LEB128 , int64LEB128 , wordLEB128 - , word16LEB128 , word32LEB128 , word64LEB128 - -- **** VLQ , wordVlq , word32Vlq , word64Vlq - -- *** Many , word8Array - -- **** Big Endian , word16ArrayBE , word32ArrayBE @@ -137,7 +121,6 @@ module Data.Bytes.Builder , int64ArrayBE , int32ArrayBE , int16ArrayBE - -- **** Little Endian , word16ArrayLE , word32ArrayLE @@ -147,67 +130,56 @@ module Data.Bytes.Builder , int64ArrayLE , int32ArrayLE , int16ArrayLE - -- ** Prefixing with Length , consLength , consLength32LE , consLength32BE , consLength64BE - -- * Encode Floating-Point Types - -- ** Human-Readable , doubleDec - -- * Replication , replicate - -- * Control , flush - -- * Rebuild , rebuild ) where import Prelude hiding (replicate) -import Control.Exception (SomeException, toException) -import Control.Monad.IO.Class (MonadIO, liftIO) -import Control.Monad.ST (ST, runST) -import Data.Bits (unsafeShiftL, unsafeShiftR, (.&.), (.|.)) -import Data.ByteString.Short.Internal (ShortByteString (SBS)) -import Data.Bytes.Builder.Unsafe - ( Builder (Builder) - , BuilderState (BuilderState) - , Commits (Immutable, Initial, Mutable) - , addCommitsLength - , commitDistance1 - , commitsOntoChunks - , copyReverseCommits - , cstring - , fromEffect - , pasteIO - , pasteUtf8TextJson# - , reverseCommitsOntoChunks - , stringUtf8 - ) -import Data.Bytes.Chunks (Chunks (ChunksCons, ChunksNil)) -import Data.Bytes.Types (Bytes (Bytes), MutableBytes (MutableBytes)) +import Control.Exception (SomeException,toException) +import Control.Monad.IO.Class (MonadIO,liftIO) +import Control.Monad.ST (ST,runST) +import Data.Bits ((.&.),(.|.),unsafeShiftL,unsafeShiftR) +import Data.Bytes.Builder.Unsafe (addCommitsLength,copyReverseCommits) +import Data.Bytes.Builder.Unsafe (Builder(Builder),commitDistance1) +import Data.Bytes.Builder.Unsafe (BuilderState(BuilderState),pasteIO) +import Data.Bytes.Builder.Unsafe (Commits(Initial,Mutable,Immutable)) +import Data.Bytes.Builder.Unsafe (commitsOntoChunks) +import Data.Bytes.Builder.Unsafe (reverseCommitsOntoChunks) +import Data.Bytes.Builder.Unsafe (stringUtf8,cstring,fromEffect) +import Data.Bytes.Builder.Unsafe (pasteUtf8TextJson#) +import Data.Bytes.Chunks (Chunks(ChunksCons,ChunksNil)) +import Data.Bytes.Types (Bytes(Bytes),MutableBytes(MutableBytes)) +import Data.ByteString.Short.Internal (ShortByteString(SBS)) import Data.Foldable (foldlM) -import Data.Int (Int16, Int32, Int64, Int8) -import Data.Primitive (ByteArray (..), MutableByteArray (..), PrimArray (..)) +import Data.Int (Int64,Int32,Int16,Int8) +import Data.Primitive (ByteArray(..),MutableByteArray(..),PrimArray(..)) import Data.Text.Short (ShortText) -import Data.WideWord (Word128, Word256) -import Data.Word (Word16, Word32, Word64, Word8) -import Data.Word.Zigzag (toZigzag32, toZigzag64, toZigzagNative) +import Data.WideWord (Word128,Word256) +import Data.Word (Word64,Word32,Word16,Word8) +import Data.Word.Zigzag (toZigzagNative,toZigzag32,toZigzag64) import Foreign.C.String (CStringLen) -import GHC.ByteOrder (ByteOrder (BigEndian, LittleEndian), targetByteOrder) -import GHC.Exts (Addr#, ByteArray#, Int (I#), Int#, MutableByteArray#, RealWorld, State#, oneShot, (*#), (+#), (-#), (<#), (>=#)) -import GHC.IO (IO (IO), stToIO) +import GHC.ByteOrder (ByteOrder(BigEndian,LittleEndian),targetByteOrder) +import GHC.Exts (MutableByteArray#,Addr#,(*#),oneShot) +import GHC.Exts (Int(I#),Int#,State#,ByteArray#,(>=#)) +import GHC.Exts (RealWorld,(+#),(-#),(<#)) import GHC.Integer.Logarithms.Compat (integerLog2#) -import GHC.Natural (naturalFromInteger, naturalToInteger) -import GHC.ST (ST (ST)) -import GHC.Word (Word (W#), Word8 (W8#)) +import GHC.IO (IO(IO),stToIO) +import GHC.Natural (naturalFromInteger,naturalToInteger) +import GHC.ST (ST(ST)) +import GHC.Word (Word(W#),Word8(W8#)) import Numeric.Natural (Natural) import qualified Compat as C @@ -222,233 +194,186 @@ import qualified Data.Text.Short as TS import qualified GHC.Exts as Exts import qualified Op as Op +#if MIN_VERSION_text(2,0,0) import Data.Text (Text) -import qualified Data.Text.Array as A import qualified Data.Text.Internal as I +import qualified Data.Text.Array as A +#endif -- | Run a builder. run :: - -- | Size of initial chunk (use 4080 if uncertain) - Int -> - -- | Builder - Builder -> - Chunks + Int -- ^ Size of initial chunk (use 4080 if uncertain) + -> Builder -- ^ Builder + -> Chunks run !hint bldr = runOnto hint bldr ChunksNil -{- | Run a builder. The resulting chunks are consed onto the -beginning of an existing sequence of chunks. --} +-- | Run a builder. The resulting chunks are consed onto the +-- beginning of an existing sequence of chunks. runOnto :: - -- | Size of initial chunk (use 4080 if uncertain) - Int -> - -- | Builder - Builder -> - -- | Suffix - Chunks -> - Chunks -runOnto hint@(I# hint#) (Builder f) cs0 = runST $ do + Int -- ^ Size of initial chunk (use 4080 if uncertain) + -> Builder -- ^ Builder + -> Chunks -- ^ Suffix + -> Chunks +runOnto hint@(I# hint# ) (Builder f) cs0 = runST $ do MutableByteArray buf0 <- PM.newByteArray hint cs <- ST $ \s0 -> case f buf0 0# hint# Initial s0 of (# s1, bufX, offX, _, csX #) -> (# s1, Mutable bufX offX csX #) reverseCommitsOntoChunks cs0 cs -{- | Variant of 'runOnto' that additionally returns the number of bytes -consed onto the suffix. --} +-- | Variant of 'runOnto' that additionally returns the number of bytes +-- consed onto the suffix. runOntoLength :: - -- | Size of initial chunk (use 4080 if uncertain) - Int -> - -- | Builder - Builder -> - -- | Suffix - Chunks -> - (Int, Chunks) -runOntoLength hint@(I# hint#) (Builder f) cs0 = runST $ do + Int -- ^ Size of initial chunk (use 4080 if uncertain) + -> Builder -- ^ Builder + -> Chunks -- ^ Suffix + -> (Int,Chunks) +runOntoLength hint@(I# hint# ) (Builder f) cs0 = runST $ do MutableByteArray buf0 <- PM.newByteArray hint cs <- ST $ \s0 -> case f buf0 0# hint# Initial s0 of (# s1, bufX, offX, _, csX #) -> (# s1, Mutable bufX offX csX #) let !n = addCommitsLength 0 cs ch <- reverseCommitsOntoChunks cs0 cs - pure (n, ch) + pure (n,ch) -{- | Variant of 'runOnto' that conses the additional chunks -in reverse order. --} +-- | Variant of 'runOnto' that conses the additional chunks +-- in reverse order. reversedOnto :: - -- | Size of initial chunk (use 4080 if uncertain) - Int -> - -- | Builder - Builder -> - Chunks -> - Chunks -reversedOnto hint@(I# hint#) (Builder f) cs0 = runST $ do + Int -- ^ Size of initial chunk (use 4080 if uncertain) + -> Builder -- ^ Builder + -> Chunks + -> Chunks +reversedOnto hint@(I# hint# ) (Builder f) cs0 = runST $ do MutableByteArray buf0 <- PM.newByteArray hint cs <- ST $ \s0 -> case f buf0 0# hint# Initial s0 of (# s1, bufX, offX, _, csX #) -> (# s1, Mutable bufX offX csX #) commitsOntoChunks cs0 cs -{- | Run a builder against lots of elements. This fills the same -underlying buffer over and over again. Do not let the argument to -the callback escape from the callback (i.e. do not write it to an -@IORef@). Also, do not @unsafeFreezeByteArray@ any of the mutable -byte arrays in the callback. The intent is that the callback will -write the buffer out. --} -putMany :: - (Foldable f) => - -- | Size of shared chunk (use 8176 if uncertain) - Int -> - -- | Value builder - (a -> Builder) -> - -- | Collection of values - f a -> - -- | Consume chunks. - (MutableBytes RealWorld -> IO b) -> - IO () -{-# INLINE putMany #-} +-- | Run a builder against lots of elements. This fills the same +-- underlying buffer over and over again. Do not let the argument to +-- the callback escape from the callback (i.e. do not write it to an +-- @IORef@). Also, do not @unsafeFreezeByteArray@ any of the mutable +-- byte arrays in the callback. The intent is that the callback will +-- write the buffer out. +putMany :: Foldable f + => Int -- ^ Size of shared chunk (use 8176 if uncertain) + -> (a -> Builder) -- ^ Value builder + -> f a -- ^ Collection of values + -> (MutableBytes RealWorld -> IO b) -- ^ Consume chunks. + -> IO () +{-# inline putMany #-} putMany hint0 g xs cb = do MutableByteArray buf0 <- PM.newByteArray hint - BuilderState bufZ offZ _ cmtsZ <- - foldlM - ( \st0 a -> do - st1@(BuilderState buf off _ cmts) <- pasteIO (g a) st0 - case cmts of - Initial -> - if I# off < threshold - then pure st1 - else do - _ <- cb (MutableBytes (MutableByteArray buf) 0 (I# off)) - pure (BuilderState buf0 0# hint# Initial) - _ -> do - let total = addCommitsLength (I# off) cmts - doff0 = total - I# off - large <- PM.newByteArray total - stToIO (PM.copyMutableByteArray large doff0 (MutableByteArray buf) 0 (I# off)) - r <- stToIO (copyReverseCommits large doff0 cmts) - case r of - 0 -> do - _ <- cb (MutableBytes large 0 total) - pure (BuilderState buf0 0# hint# Initial) - _ -> IO (\s0 -> Exts.raiseIO# putManyError s0) - ) - (BuilderState buf0 0# hint# Initial) - xs + BuilderState bufZ offZ _ cmtsZ <- foldlM + (\st0 a -> do + st1@(BuilderState buf off _ cmts) <- pasteIO (g a) st0 + case cmts of + Initial -> if I# off < threshold + then pure st1 + else do + _ <- cb (MutableBytes (MutableByteArray buf) 0 (I# off)) + pure (BuilderState buf0 0# hint# Initial) + _ -> do + let total = addCommitsLength (I# off) cmts + doff0 = total - I# off + large <- PM.newByteArray total + stToIO (PM.copyMutableByteArray large doff0 (MutableByteArray buf) 0 (I# off)) + r <- stToIO (copyReverseCommits large doff0 cmts) + case r of + 0 -> do + _ <- cb (MutableBytes large 0 total) + pure (BuilderState buf0 0# hint# Initial) + _ -> IO (\s0 -> Exts.raiseIO# putManyError s0) + ) (BuilderState buf0 0# hint# Initial) xs _ <- case cmtsZ of Initial -> cb (MutableBytes (MutableByteArray bufZ) 0 (I# offZ)) _ -> IO (\s0 -> Exts.raiseIO# putManyError s0) pure () - where + where !hint@(I# hint#) = max hint0 8 !threshold = div (hint * 3) 4 putManyError :: SomeException -{-# NOINLINE putManyError #-} -putManyError = - toException - (userError "bytebuild: putMany implementation error") +{-# noinline putManyError #-} +putManyError = toException + (userError "bytebuild: putMany implementation error") -{- | Variant of 'putMany' that prefixes each pushed array of chunks -with the number of bytes that the chunks in each batch required. -(This excludes the bytes required to encode the length itself.) -This is useful for chunked HTTP encoding. --} -putManyConsLength :: - (Foldable f, MonadIO m) => - -- | Number of bytes used by the serialization of the length - Arithmetic.Nat n -> - -- | Length serialization function - (Int -> Bounded.Builder n) -> - -- | Size of shared chunk (use 8176 if uncertain) - Int -> - -- | Value builder - (a -> Builder) -> - -- | Collection of values - f a -> - -- | Consume chunks. - (MutableBytes RealWorld -> m b) -> - m () -{-# INLINE putManyConsLength #-} +-- | Variant of 'putMany' that prefixes each pushed array of chunks +-- with the number of bytes that the chunks in each batch required. +-- (This excludes the bytes required to encode the length itself.) +-- This is useful for chunked HTTP encoding. +putManyConsLength :: (Foldable f, MonadIO m) + => Arithmetic.Nat n -- ^ Number of bytes used by the serialization of the length + -> (Int -> Bounded.Builder n) -- ^ Length serialization function + -> Int -- ^ Size of shared chunk (use 8176 if uncertain) + -> (a -> Builder) -- ^ Value builder + -> f a -- ^ Collection of values + -> (MutableBytes RealWorld -> m b) -- ^ Consume chunks. + -> m () +{-# inline putManyConsLength #-} putManyConsLength n buildSize hint g xs cb = do - let !(I# n#) = Nat.demote n - let !(I# actual#) = max hint (I# n#) + let !(I# n# ) = Nat.demote n + let !(I# actual# ) = max hint (I# n# ) let !threshold = div (I# actual# * 3) 4 - MutableByteArray buf0 <- liftIO (PM.newByteArray (I# actual#)) - BuilderState bufZ offZ _ cmtsZ <- - foldlM - ( \st0 a -> do - st1@(BuilderState buf off _ cmts) <- liftIO (pasteIO (g a) st0) - case cmts of - Initial -> - if I# off < threshold - then pure st1 - else do - let !dist = off -# n# - _ <- - liftIO $ - stToIO $ - UnsafeBounded.pasteST - (buildSize (fromIntegral (I# dist))) - (MutableByteArray buf0) - 0 - _ <- cb (MutableBytes (MutableByteArray buf) 0 (I# off)) - pure (BuilderState buf0 n# (actual# -# n#) Initial) - _ -> do - let !dist = commitDistance1 buf0 n# buf off cmts - _ <- - liftIO $ - stToIO $ - UnsafeBounded.pasteST - (buildSize (fromIntegral (I# dist))) - (MutableByteArray buf0) - 0 - let total = addCommitsLength (I# off) cmts - doff0 = total - I# off - large <- liftIO (PM.newByteArray total) - liftIO (stToIO (PM.copyMutableByteArray large doff0 (MutableByteArray buf) 0 (I# off))) - r <- liftIO (stToIO (copyReverseCommits large doff0 cmts)) - case r of - 0 -> do - _ <- cb (MutableBytes large 0 total) - pure (BuilderState buf0 n# (actual# -# n#) Initial) - _ -> liftIO (IO (\s0 -> Exts.raiseIO# putManyError s0)) - ) - (BuilderState buf0 n# (actual# -# n#) Initial) - xs + MutableByteArray buf0 <- liftIO (PM.newByteArray (I# actual# )) + BuilderState bufZ offZ _ cmtsZ <- foldlM + (\st0 a -> do + st1@(BuilderState buf off _ cmts) <- liftIO (pasteIO (g a) st0) + case cmts of + Initial -> if I# off < threshold + then pure st1 + else do + let !dist = off -# n# + _ <- liftIO $ stToIO $ UnsafeBounded.pasteST + (buildSize (fromIntegral (I# dist))) + (MutableByteArray buf0) 0 + _ <- cb (MutableBytes (MutableByteArray buf) 0 (I# off)) + pure (BuilderState buf0 n# (actual# -# n# ) Initial) + _ -> do + let !dist = commitDistance1 buf0 n# buf off cmts + _ <- liftIO $ stToIO $ UnsafeBounded.pasteST + (buildSize (fromIntegral (I# dist))) + (MutableByteArray buf0) 0 + let total = addCommitsLength (I# off) cmts + doff0 = total - I# off + large <- liftIO (PM.newByteArray total) + liftIO (stToIO (PM.copyMutableByteArray large doff0 (MutableByteArray buf) 0 (I# off))) + r <- liftIO (stToIO (copyReverseCommits large doff0 cmts)) + case r of + 0 -> do + _ <- cb (MutableBytes large 0 total) + pure (BuilderState buf0 n# (actual# -# n# ) Initial) + _ -> liftIO (IO (\s0 -> Exts.raiseIO# putManyError s0)) + ) (BuilderState buf0 n# (actual# -# n# ) Initial) xs _ <- case cmtsZ of Initial -> do let !distZ = offZ -# n# - _ <- - liftIO $ - stToIO $ - UnsafeBounded.pasteST - (buildSize (fromIntegral (I# distZ))) - (MutableByteArray buf0) - 0 + _ <- liftIO $ stToIO $ UnsafeBounded.pasteST + (buildSize (fromIntegral (I# distZ))) + (MutableByteArray buf0) + 0 cb (MutableBytes (MutableByteArray bufZ) 0 (I# offZ)) _ -> liftIO (IO (\s0 -> Exts.raiseIO# putManyError s0)) pure () -{- | Convert a bounded builder to an unbounded one. If the size -is a constant, use @Arithmetic.Nat.constant@ as the first argument -to let GHC conjure up this value for you. --} +-- | Convert a bounded builder to an unbounded one. If the size +-- is a constant, use @Arithmetic.Nat.constant@ as the first argument +-- to let GHC conjure up this value for you. fromBounded :: - Arithmetic.Nat n -> - Bounded.Builder n -> - Builder -{-# INLINE fromBounded #-} + Arithmetic.Nat n + -> Bounded.Builder n + -> Builder +{-# inline fromBounded #-} fromBounded n (UnsafeBounded.Builder f) = Builder $ \buf0 off0 len0 cs0 s0 -> let !(I# req) = Nat.demote n !(# s1, buf1, off1, len1, cs1 #) = case len0 >=# req of 1# -> (# s0, buf0, off0, len0, cs0 #) - _ -> - let !(I# lenX) = max 4080 (I# req) - in case Exts.newByteArray# lenX s0 of - (# sX, bufX #) -> - (# sX, bufX, 0#, lenX, Mutable buf0 off0 cs0 #) + _ -> let !(I# lenX) = max 4080 (I# req) in + case Exts.newByteArray# lenX s0 of + (# sX, bufX #) -> + (# sX, bufX, 0#, lenX, Mutable buf0 off0 cs0 #) in case f buf1 off1 s1 of (# s2, off2 #) -> (# s2, buf1, off2, len1 -# (off2 -# off1), cs1 #) @@ -457,9 +382,9 @@ fromBounded n (UnsafeBounded.Builder f) = Builder $ \buf0 off0 len0 cs0 s0 -> -- Use this instead of fromBounded (where possible) leads to marginally -- better results in benchmarks. fromBoundedOne :: - Bounded.Builder 1 -> - Builder -{-# INLINE fromBoundedOne #-} + Bounded.Builder 1 + -> Builder +{-# inline fromBoundedOne #-} fromBoundedOne (UnsafeBounded.Builder f) = Builder $ \buf0 off0 len0 cs0 s0 -> let !(# s1, buf1, off1, len1, cs1 #) = case len0 of 0# -> case Exts.newByteArray# 4080# s0 of @@ -476,35 +401,31 @@ byteArray a = bytes (Bytes a 0 (PM.sizeofByteArray a)) -- | Create a builder from a short bytestring. Implemented with 'bytes'. shortByteString :: ShortByteString -> Builder shortByteString (SBS x) = bytes (Bytes a 0 (PM.sizeofByteArray a)) - where - a = ByteArray x + where a = ByteArray x -{- | Create a builder from a sliced byte sequence. The variants -'copy' and 'insert' provide more control over whether or not -the byte sequence is copied or aliased. This function is preferred -when the user does not know the size of the byte sequence. --} +-- | Create a builder from a sliced byte sequence. The variants +-- 'copy' and 'insert' provide more control over whether or not +-- the byte sequence is copied or aliased. This function is preferred +-- when the user does not know the size of the byte sequence. bytes :: Bytes -> Builder -bytes (Bytes (ByteArray src#) (I# soff#) (I# slen#)) = - Builder - -- There are three cases to consider: (1) there is not enough - -- space and (1a) the chunk is not small or (1b) the chunk is - -- small; (2) There is enough space for a copy. - ( \buf0 off0 len0 cs0 s0 -> case len0 <# slen# of - 1# -> case slen# >=# 256# of - 1# -> case Exts.newByteArray# 0# s0 of - (# s1, buf1 #) -> (# s1, buf1, 0#, 0#, Immutable src# soff# slen# (Mutable buf0 off0 cs0) #) - _ -> case Exts.newByteArray# 4080# s0 of - (# s1, buf1 #) -> case Op.copyByteArray# src# soff# buf1 0# slen# s1 of - s2 -> (# s2, buf1, slen#, 4080# -# slen#, Mutable buf0 off0 cs0 #) - _ -> - let s1 = Op.copyByteArray# src# soff# buf0 off0 slen# s0 - in (# s1, buf0, off0 +# slen#, len0 -# slen#, cs0 #) - ) +bytes (Bytes (ByteArray src# ) (I# soff# ) (I# slen# )) = Builder + -- There are three cases to consider: (1) there is not enough + -- space and (1a) the chunk is not small or (1b) the chunk is + -- small; (2) There is enough space for a copy. + (\buf0 off0 len0 cs0 s0 -> case len0 <# slen# of + 1# -> case slen# >=# 256# of + 1# -> case Exts.newByteArray# 0# s0 of + (# s1, buf1 #) -> (# s1, buf1, 0#, 0#, Immutable src# soff# slen# (Mutable buf0 off0 cs0) #) + _ -> case Exts.newByteArray# 4080# s0 of + (# s1, buf1 #) -> case Op.copyByteArray# src# soff# buf1 0# slen# s1 of + s2 -> (# s2, buf1, slen#, 4080# -# slen#, Mutable buf0 off0 cs0 #) + _ -> let s1 = Op.copyByteArray# src# soff# buf0 off0 slen# s0 in + (# s1, buf0, off0 +# slen#, len0 -# slen#, cs0 #) + ) -- | Paste byte chunks into a builder. chunks :: Chunks -> Builder -{-# NOINLINE chunks #-} +{-# noinline chunks #-} chunks xs0 = -- Implementation note: It would probably be good to begin with a -- goCopying phase before switching to goInserting. If the total @@ -513,8 +434,8 @@ chunks xs0 = -- Note: This function needs a test in the test suite. Builder $ \buf0 off0 len0 cs0 s0 -> case xs0 of ChunksNil -> (# s0, buf0, off0, len0, cs0 #) - ChunksCons {} -> goInserting xs0 (Mutable buf0 off0 cs0) s0 - where + ChunksCons{} -> goInserting xs0 (Mutable buf0 off0 cs0) s0 + where -- Notice that goNoncopying does not take a buffer as an argument. At the -- very end, we create a 128-byte buffer with nothing in it and present -- that as the new buffer. We *cannot* simply reuse the old buffer with @@ -525,84 +446,74 @@ chunks xs0 = goInserting (ChunksCons (Bytes (ByteArray b) (I# off) (I# len)) ys) !cs s0 = goInserting ys (Immutable b off len cs) s0 -{- | Create a builder from a byte sequence. This always results in a -call to @memcpy@. This is beneficial when the byte sequence is -known to be small (less than 256 bytes). --} +-- | Create a builder from a byte sequence. This always results in a +-- call to @memcpy@. This is beneficial when the byte sequence is +-- known to be small (less than 256 bytes). copy :: Bytes -> Builder -copy (Bytes (ByteArray src#) (I# soff#) (I# slen#)) = - Builder - ( \buf0 off0 len0 cs0 s0 -> case len0 <# slen# of - 1# -> case Exts.newByteArray# newSz s0 of - (# s1, buf1 #) -> case Op.copyByteArray# src# soff# buf1 0# slen# s1 of - s2 -> (# s2, buf1, slen#, newSz -# slen#, Mutable buf0 off0 cs0 #) - _ -> - let !s1 = Op.copyByteArray# src# soff# buf0 off0 slen# s0 - in (# s1, buf0, off0 +# slen#, len0 -# slen#, cs0 #) - ) - where +copy (Bytes (ByteArray src# ) (I# soff# ) (I# slen# )) = Builder + (\buf0 off0 len0 cs0 s0 -> case len0 <# slen# of + 1# -> case Exts.newByteArray# newSz s0 of + (# s1, buf1 #) -> case Op.copyByteArray# src# soff# buf1 0# slen# s1 of + s2 -> (# s2, buf1, slen#, newSz -# slen#, Mutable buf0 off0 cs0 #) + _ -> let !s1 = Op.copyByteArray# src# soff# buf0 off0 slen# s0 in + (# s1, buf0, off0 +# slen#, len0 -# slen#, cs0 #) + ) + where !(I# newSz) = max (I# slen#) 4080 -{- | Variant of 'copy' that additionally pastes an extra byte in -front of the bytes. --} +-- | Variant of 'copy' that additionally pastes an extra byte in +-- front of the bytes. copyCons :: Word8 -> Bytes -> Builder -copyCons (W8# w0) (Bytes (ByteArray src#) (I# soff#) (I# slen#)) = - Builder - ( \buf0 off0 len0 cs0 s0 -> case len0 <# (slen# +# 1#) of - 1# -> case Exts.newByteArray# newSz s0 of - (# s1, buf1 #) -> case Op.copyByteArray# src# soff# buf1 1# slen# s1 of - s2 -> case Exts.writeWord8Array# buf1 0# w0 s2 of - s3 -> (# s3, buf1, slen# +# 1#, newSz -# (slen# +# 1#), Mutable buf0 off0 cs0 #) - _ -> - let !s1 = Op.copyByteArray# src# soff# buf0 (off0 +# 1#) slen# s0 - !s2 = Exts.writeWord8Array# buf0 off0 w0 s1 - in (# s2, buf0, off0 +# (slen# +# 1#), len0 -# (slen# +# 1#), cs0 #) - ) - where +copyCons (W8# w0) (Bytes (ByteArray src# ) (I# soff# ) (I# slen# )) = Builder + (\buf0 off0 len0 cs0 s0 -> case len0 <# (slen# +# 1#) of + 1# -> case Exts.newByteArray# newSz s0 of + (# s1, buf1 #) -> case Op.copyByteArray# src# soff# buf1 1# slen# s1 of + s2 -> case Exts.writeWord8Array# buf1 0# w0 s2 of + s3 -> (# s3, buf1, slen# +# 1#, newSz -# (slen# +# 1#), Mutable buf0 off0 cs0 #) + _ -> let !s1 = Op.copyByteArray# src# soff# buf0 (off0 +# 1#) slen# s0 + !s2 = Exts.writeWord8Array# buf0 off0 w0 s1 + in (# s2, buf0, off0 +# (slen# +# 1#), len0 -# (slen# +# 1#), cs0 #) + ) + where !(I# newSz) = max ((I# slen#) + 1) 4080 cstring# :: Addr# -> Builder -{-# INLINE cstring# #-} +{-# inline cstring# #-} cstring# x = cstring (Exts.Ptr x) -{- | Create a builder from a C string with explicit length. The builder -must be executed before the C string is freed. --} +-- | Create a builder from a C string with explicit length. The builder +-- must be executed before the C string is freed. cstringLen :: CStringLen -> Builder -cstringLen (Exts.Ptr src#, I# slen#) = - Builder - ( \buf0 off0 len0 cs0 s0 -> case len0 <# slen# of - 1# -> case Exts.newByteArray# newSz s0 of - (# s1, buf1 #) -> case Exts.copyAddrToByteArray# src# buf1 0# slen# s1 of - s2 -> (# s2, buf1, slen#, newSz -# slen#, Mutable buf0 off0 cs0 #) - _ -> - let !s1 = Exts.copyAddrToByteArray# src# buf0 off0 slen# s0 - in (# s1, buf0, off0 +# slen#, len0 -# slen#, cs0 #) - ) - where +cstringLen (Exts.Ptr src#, I# slen# ) = Builder + (\buf0 off0 len0 cs0 s0 -> case len0 <# slen# of + 1# -> case Exts.newByteArray# newSz s0 of + (# s1, buf1 #) -> case Exts.copyAddrToByteArray# src# buf1 0# slen# s1 of + s2 -> (# s2, buf1, slen#, newSz -# slen#, Mutable buf0 off0 cs0 #) + _ -> let !s1 = Exts.copyAddrToByteArray# src# buf0 off0 slen# s0 in + (# s1, buf0, off0 +# slen#, len0 -# slen#, cs0 #) + ) + where !(I# newSz) = max (I# slen#) 4080 -{- | Encode seven bytes into eight so that the encoded form is eight-bit clean. -Specifically segment the input bytes inot 7-bit groups (lowest-to-highest -index byte, most-to-least significant bit within a byte), pads the last group -with trailing zeros, and forms octects by prepending a zero to each group. - -The name was chosen because this pads the input bits with zeros on the right, -and also because this was likely the originally-indended behavior of the -SMILE standard (see 'sevenEightSmile'). Right padding the input bits to a -multiple of seven, as in this variant, is consistent with base64 encodings -(which encodes 3 bytes in 4) and base85 (which encodes 4 bytes in 5). --} +-- | Encode seven bytes into eight so that the encoded form is eight-bit clean. +-- Specifically segment the input bytes inot 7-bit groups (lowest-to-highest +-- index byte, most-to-least significant bit within a byte), pads the last group +-- with trailing zeros, and forms octects by prepending a zero to each group. +-- +-- The name was chosen because this pads the input bits with zeros on the right, +-- and also because this was likely the originally-indended behavior of the +-- SMILE standard (see 'sevenEightSmile'). Right padding the input bits to a +-- multiple of seven, as in this variant, is consistent with base64 encodings +-- (which encodes 3 bytes in 4) and base85 (which encodes 4 bytes in 5). sevenEightRight :: Bytes -> Builder sevenEightRight bs0 = case toWord 0 0 bs0 of (0, _) -> mempty (len, w) -> go (len * 8) w <> sevenEightSmile (Bytes.unsafeDrop len bs0) - where + where go :: Int -> Word64 -> Builder go !nBits !_ | nBits <= 0 = mempty go !nBits !w = - let octet = (fromIntegral $ unsafeShiftR w (8 * 7 + 1)) .&. 0x7f + let octet = (fromIntegral $ unsafeShiftR w (8*7+1)) .&. 0x7f in word8 octet <> go (nBits - 7) (unsafeShiftL w 7) toWord :: Int -> Word64 -> Bytes -> (Int, Word64) toWord !i !acc !bs @@ -611,29 +522,28 @@ sevenEightRight bs0 = case toWord 0 0 bs0 of let b = fromIntegral @Word8 @Word64 $ Bytes.unsafeIndex bs 0 acc' = acc .|. unsafeShiftL b (fromIntegral $ 8 * (7 - i)) in if i < 7 - then toWord (i + 1) acc' (Bytes.unsafeDrop 1 bs) - else (i, acc) + then toWord (i + 1) acc' (Bytes.unsafeDrop 1 bs) + else (i, acc) -{- | Encode seven bytes into eight so that the encoded form is eight-bit clean. -Specifically segment the input bytes inot 7-bit groups (lowest-to-highest -index byte, most-to-least significant bit within a byte), then pad each group -with zeros on the left until each group is an octet. - -The name was chosen because this is the implementation that is used (probably -unintentionally) in the reference SMILE implementation, and so is expected tp -be accepted by existing SMILE consumers. --} +-- | Encode seven bytes into eight so that the encoded form is eight-bit clean. +-- Specifically segment the input bytes inot 7-bit groups (lowest-to-highest +-- index byte, most-to-least significant bit within a byte), then pad each group +-- with zeros on the left until each group is an octet. +-- +-- The name was chosen because this is the implementation that is used (probably +-- unintentionally) in the reference SMILE implementation, and so is expected tp +-- be accepted by existing SMILE consumers. sevenEightSmile :: Bytes -> Builder sevenEightSmile bs0 = case toWord 0 0 bs0 of (0, _) -> mempty (len, w) -> go (len * 8) w <> sevenEightSmile (Bytes.unsafeDrop len bs0) - where + where go :: Int -> Word64 -> Builder go !nBits !w | nBits == 0 = mempty | nBits < 7 = go 7 (unsafeShiftR w (7 - nBits)) go !nBits !w = - let octet = (fromIntegral $ unsafeShiftR w (8 * 7 + 1)) .&. 0x7f + let octet = (fromIntegral $ unsafeShiftR w (8*7+1)) .&. 0x7f in word8 octet <> go (nBits - 7) (unsafeShiftL w 7) toWord :: Int -> Word64 -> Bytes -> (Int, Word64) toWord !i !acc !bs @@ -642,51 +552,44 @@ sevenEightSmile bs0 = case toWord 0 0 bs0 of let b = fromIntegral @Word8 @Word64 $ Bytes.unsafeIndex bs 0 acc' = acc .|. unsafeShiftL b (fromIntegral $ 8 * (7 - i)) in if i < 7 - then toWord (i + 1) acc' (Bytes.unsafeDrop 1 bs) - else (i, acc) + then toWord (i + 1) acc' (Bytes.unsafeDrop 1 bs) + else (i, acc) -{- | Create a builder from two byte sequences. This always results in two -calls to @memcpy@. This is beneficial when the byte sequences are -known to be small (less than 256 bytes). --} +-- | Create a builder from two byte sequences. This always results in two +-- calls to @memcpy@. This is beneficial when the byte sequences are +-- known to be small (less than 256 bytes). copy2 :: Bytes -> Bytes -> Builder -copy2 - (Bytes (ByteArray srcA#) (I# soffA#) (I# slenA#)) - (Bytes (ByteArray srcB#) (I# soffB#) (I# slenB#)) = - Builder - ( \buf0 off0 len0 cs0 s0 -> case len0 <# slen# of - 1# -> case Exts.newByteArray# newSz s0 of - (# s1, buf1 #) -> case Op.copyByteArray# srcA# soffA# buf1 0# slenA# s1 of - s2 -> case Op.copyByteArray# srcB# soffB# buf1 slenA# slenB# s2 of - s3 -> (# s3, buf1, slen#, newSz -# slen#, Mutable buf0 off0 cs0 #) - _ -> - let !s1 = Op.copyByteArray# srcA# soffA# buf0 off0 slenA# s0 - !s2 = Op.copyByteArray# srcB# soffB# buf0 (off0 +# slenA#) slenB# s1 - in (# s2, buf0, off0 +# slen#, len0 -# slen#, cs0 #) - ) - where - !slen# = slenA# +# slenB# - !(I# newSz) = max (I# slen#) 4080 +copy2 (Bytes (ByteArray srcA# ) (I# soffA# ) (I# slenA# )) + (Bytes (ByteArray srcB# ) (I# soffB# ) (I# slenB# )) = Builder + (\buf0 off0 len0 cs0 s0 -> case len0 <# slen# of + 1# -> case Exts.newByteArray# newSz s0 of + (# s1, buf1 #) -> case Op.copyByteArray# srcA# soffA# buf1 0# slenA# s1 of + s2 -> case Op.copyByteArray# srcB# soffB# buf1 slenA# slenB# s2 of + s3 -> (# s3, buf1, slen#, newSz -# slen#, Mutable buf0 off0 cs0 #) + _ -> let !s1 = Op.copyByteArray# srcA# soffA# buf0 off0 slenA# s0 + !s2 = Op.copyByteArray# srcB# soffB# buf0 (off0 +# slenA# ) slenB# s1 in + (# s2, buf0, off0 +# slen#, len0 -# slen#, cs0 #) + ) + where + !slen# = slenA# +# slenB# + !(I# newSz) = max (I# slen#) 4080 -{- | Create a builder from a byte sequence. This never calls @memcpy@. -Instead, it pushes a chunk that references the argument byte sequence. -This wastes the remaining space in the active chunk, so it may adversely -affect performance if used carelessly. See 'flush' for a way to mitigate -this problem. This functions is most beneficial when the byte sequence -is known to be large (more than 8192 bytes). --} +-- | Create a builder from a byte sequence. This never calls @memcpy@. +-- Instead, it pushes a chunk that references the argument byte sequence. +-- This wastes the remaining space in the active chunk, so it may adversely +-- affect performance if used carelessly. See 'flush' for a way to mitigate +-- this problem. This functions is most beneficial when the byte sequence +-- is known to be large (more than 8192 bytes). insert :: Bytes -> Builder -insert (Bytes (ByteArray src#) (I# soff#) (I# slen#)) = - Builder - ( \buf0 off0 _ cs0 s0 -> case Exts.newByteArray# 0# s0 of - (# s1, buf1 #) -> - (# s1, buf1, 0#, 0#, Immutable src# soff# slen# (Mutable buf0 off0 cs0) #) - ) +insert (Bytes (ByteArray src# ) (I# soff# ) (I# slen# )) = Builder + (\buf0 off0 _ cs0 s0 -> case Exts.newByteArray# 0# s0 of + (# s1, buf1 #) -> + (# s1, buf1, 0#, 0#, Immutable src# soff# slen# (Mutable buf0 off0 cs0) #) + ) -{- | Create a builder from a slice of an array of 'Word8'. There is the same -as 'bytes' but is provided as a convenience for users working with different -types. --} +-- | Create a builder from a slice of an array of 'Word8'. There is the same +-- as 'bytes' but is provided as a convenience for users working with different +-- types. word8Array :: PrimArray Word8 -> Int -> Int -> Builder word8Array (PrimArray arr) off len = bytes (Bytes (ByteArray arr) off len) @@ -761,134 +664,129 @@ word16ArrayBE src@(PrimArray arr) soff0 slen0 = case targetByteOrder of word16ArraySwap :: PrimArray Word16 -> Int -> Int -> Builder word16ArraySwap src soff0 slen0 = fromFunction (slen0 * 2) (go (soff0 * 2) ((soff0 + slen0) * 2)) - where + where go :: Int -> Int -> MutableByteArray s -> Int -> ST s Int - go !soff !send !dst !doff = - if soff < send - then do - let v0 = PM.indexPrimArray (asWord8s src) soff - v1 = PM.indexPrimArray (asWord8s src) (soff + 1) - PM.writeByteArray dst doff v1 - PM.writeByteArray dst (doff + 1) v0 - go (soff + 2) send dst (doff + 2) - else pure doff + go !soff !send !dst !doff = if soff < send + then do + let v0 = PM.indexPrimArray (asWord8s src) soff + v1 = PM.indexPrimArray (asWord8s src) (soff + 1) + PM.writeByteArray dst doff v1 + PM.writeByteArray dst (doff + 1) v0 + go (soff + 2) send dst (doff + 2) + else pure doff word32ArraySwap :: PrimArray Word32 -> Int -> Int -> Builder word32ArraySwap src soff0 slen0 = fromFunction (slen0 * 4) (go (soff0 * 4) ((soff0 + slen0) * 4)) - where + where go :: Int -> Int -> MutableByteArray s -> Int -> ST s Int - go !soff !send !dst !doff = - if soff < send - then do - let v0 = PM.indexPrimArray (asWord8s src) soff - v1 = PM.indexPrimArray (asWord8s src) (soff + 1) - v2 = PM.indexPrimArray (asWord8s src) (soff + 2) - v3 = PM.indexPrimArray (asWord8s src) (soff + 3) - PM.writeByteArray dst doff v3 - PM.writeByteArray dst (doff + 1) v2 - PM.writeByteArray dst (doff + 2) v1 - PM.writeByteArray dst (doff + 3) v0 - go (soff + 4) send dst (doff + 4) - else pure doff + go !soff !send !dst !doff = if soff < send + then do + let v0 = PM.indexPrimArray (asWord8s src) soff + v1 = PM.indexPrimArray (asWord8s src) (soff + 1) + v2 = PM.indexPrimArray (asWord8s src) (soff + 2) + v3 = PM.indexPrimArray (asWord8s src) (soff + 3) + PM.writeByteArray dst doff v3 + PM.writeByteArray dst (doff + 1) v2 + PM.writeByteArray dst (doff + 2) v1 + PM.writeByteArray dst (doff + 3) v0 + go (soff + 4) send dst (doff + 4) + else pure doff word64ArraySwap :: PrimArray Word64 -> Int -> Int -> Builder word64ArraySwap src soff0 slen0 = fromFunction (slen0 * 8) (go (soff0 * 8) ((soff0 + slen0) * 8)) - where + where go :: Int -> Int -> MutableByteArray s -> Int -> ST s Int - go !soff !send !dst !doff = - if soff < send - then do - let v0 = PM.indexPrimArray (asWord8s src) soff - v1 = PM.indexPrimArray (asWord8s src) (soff + 1) - v2 = PM.indexPrimArray (asWord8s src) (soff + 2) - v3 = PM.indexPrimArray (asWord8s src) (soff + 3) - v4 = PM.indexPrimArray (asWord8s src) (soff + 4) - v5 = PM.indexPrimArray (asWord8s src) (soff + 5) - v6 = PM.indexPrimArray (asWord8s src) (soff + 6) - v7 = PM.indexPrimArray (asWord8s src) (soff + 7) - PM.writeByteArray dst doff v7 - PM.writeByteArray dst (doff + 1) v6 - PM.writeByteArray dst (doff + 2) v5 - PM.writeByteArray dst (doff + 3) v4 - PM.writeByteArray dst (doff + 4) v3 - PM.writeByteArray dst (doff + 5) v2 - PM.writeByteArray dst (doff + 6) v1 - PM.writeByteArray dst (doff + 7) v0 - go (soff + 8) send dst (doff + 8) - else pure doff + go !soff !send !dst !doff = if soff < send + then do + let v0 = PM.indexPrimArray (asWord8s src) soff + v1 = PM.indexPrimArray (asWord8s src) (soff + 1) + v2 = PM.indexPrimArray (asWord8s src) (soff + 2) + v3 = PM.indexPrimArray (asWord8s src) (soff + 3) + v4 = PM.indexPrimArray (asWord8s src) (soff + 4) + v5 = PM.indexPrimArray (asWord8s src) (soff + 5) + v6 = PM.indexPrimArray (asWord8s src) (soff + 6) + v7 = PM.indexPrimArray (asWord8s src) (soff + 7) + PM.writeByteArray dst doff v7 + PM.writeByteArray dst (doff + 1) v6 + PM.writeByteArray dst (doff + 2) v5 + PM.writeByteArray dst (doff + 3) v4 + PM.writeByteArray dst (doff + 4) v3 + PM.writeByteArray dst (doff + 5) v2 + PM.writeByteArray dst (doff + 6) v1 + PM.writeByteArray dst (doff + 7) v0 + go (soff + 8) send dst (doff + 8) + else pure doff word128ArraySwap :: PrimArray Word128 -> Int -> Int -> Builder word128ArraySwap src soff0 slen0 = fromFunction (slen0 * 16) (go (soff0 * 16) ((soff0 + slen0) * 16)) - where + where -- TODO: Perhaps we could put byteswapping functions to use -- rather than indexing tons of Word8s. This could be done -- both here and in the other swap functions. There are a -- decent number of tests for these array-swapping functions, -- which makes changing this less scary. go :: Int -> Int -> MutableByteArray s -> Int -> ST s Int - go !soff !send !dst !doff = - if soff < send - then do - let v0 = PM.indexPrimArray (asWord8s src) soff - v1 = PM.indexPrimArray (asWord8s src) (soff + 1) - v2 = PM.indexPrimArray (asWord8s src) (soff + 2) - v3 = PM.indexPrimArray (asWord8s src) (soff + 3) - v4 = PM.indexPrimArray (asWord8s src) (soff + 4) - v5 = PM.indexPrimArray (asWord8s src) (soff + 5) - v6 = PM.indexPrimArray (asWord8s src) (soff + 6) - v7 = PM.indexPrimArray (asWord8s src) (soff + 7) - v8 = PM.indexPrimArray (asWord8s src) (soff + 8) - v9 = PM.indexPrimArray (asWord8s src) (soff + 9) - v10 = PM.indexPrimArray (asWord8s src) (soff + 10) - v11 = PM.indexPrimArray (asWord8s src) (soff + 11) - v12 = PM.indexPrimArray (asWord8s src) (soff + 12) - v13 = PM.indexPrimArray (asWord8s src) (soff + 13) - v14 = PM.indexPrimArray (asWord8s src) (soff + 14) - v15 = PM.indexPrimArray (asWord8s src) (soff + 15) - PM.writeByteArray dst doff v15 - PM.writeByteArray dst (doff + 1) v14 - PM.writeByteArray dst (doff + 2) v13 - PM.writeByteArray dst (doff + 3) v12 - PM.writeByteArray dst (doff + 4) v11 - PM.writeByteArray dst (doff + 5) v10 - PM.writeByteArray dst (doff + 6) v9 - PM.writeByteArray dst (doff + 7) v8 - PM.writeByteArray dst (doff + 8) v7 - PM.writeByteArray dst (doff + 9) v6 - PM.writeByteArray dst (doff + 10) v5 - PM.writeByteArray dst (doff + 11) v4 - PM.writeByteArray dst (doff + 12) v3 - PM.writeByteArray dst (doff + 13) v2 - PM.writeByteArray dst (doff + 14) v1 - PM.writeByteArray dst (doff + 15) v0 - go (soff + 16) send dst (doff + 16) - else pure doff + go !soff !send !dst !doff = if soff < send + then do + let v0 = PM.indexPrimArray (asWord8s src) soff + v1 = PM.indexPrimArray (asWord8s src) (soff + 1) + v2 = PM.indexPrimArray (asWord8s src) (soff + 2) + v3 = PM.indexPrimArray (asWord8s src) (soff + 3) + v4 = PM.indexPrimArray (asWord8s src) (soff + 4) + v5 = PM.indexPrimArray (asWord8s src) (soff + 5) + v6 = PM.indexPrimArray (asWord8s src) (soff + 6) + v7 = PM.indexPrimArray (asWord8s src) (soff + 7) + v8 = PM.indexPrimArray (asWord8s src) (soff + 8) + v9 = PM.indexPrimArray (asWord8s src) (soff + 9) + v10 = PM.indexPrimArray (asWord8s src) (soff + 10) + v11 = PM.indexPrimArray (asWord8s src) (soff + 11) + v12 = PM.indexPrimArray (asWord8s src) (soff + 12) + v13 = PM.indexPrimArray (asWord8s src) (soff + 13) + v14 = PM.indexPrimArray (asWord8s src) (soff + 14) + v15 = PM.indexPrimArray (asWord8s src) (soff + 15) + PM.writeByteArray dst doff v15 + PM.writeByteArray dst (doff + 1) v14 + PM.writeByteArray dst (doff + 2) v13 + PM.writeByteArray dst (doff + 3) v12 + PM.writeByteArray dst (doff + 4) v11 + PM.writeByteArray dst (doff + 5) v10 + PM.writeByteArray dst (doff + 6) v9 + PM.writeByteArray dst (doff + 7) v8 + PM.writeByteArray dst (doff + 8) v7 + PM.writeByteArray dst (doff + 9) v6 + PM.writeByteArray dst (doff + 10) v5 + PM.writeByteArray dst (doff + 11) v4 + PM.writeByteArray dst (doff + 12) v3 + PM.writeByteArray dst (doff + 13) v2 + PM.writeByteArray dst (doff + 14) v1 + PM.writeByteArray dst (doff + 15) v0 + go (soff + 16) send dst (doff + 16) + else pure doff word256ArraySwap :: PrimArray Word256 -> Int -> Int -> Builder word256ArraySwap src soff0 slen0 = fromFunction (slen0 * 32) (go (soff0 * 32) ((soff0 + slen0) * 32)) - where + where -- TODO: Perhaps we could put byteswapping functions to use -- rather than indexing tons of Word8s. This could be done -- both here and in the other swap functions. There are a -- decent number of tests for these array-swapping functions, -- which makes changing this less scary. go :: Int -> Int -> MutableByteArray s -> Int -> ST s Int - go !soff !send !dst !doff = - if soff < send - then do - let loop !i - | i < 32 = do - let v = PM.indexPrimArray (asWord8s src) (soff + i) - PM.writeByteArray dst (doff + (31 - i)) v - loop (i + 1) - | otherwise = pure () - loop 0 - go (soff + 32) send dst (doff + 32) - else pure doff + go !soff !send !dst !doff = if soff < send + then do + let loop !i + | i < 32 = do + let v = PM.indexPrimArray (asWord8s src) (soff + i) + PM.writeByteArray dst (doff + (31 - i)) v + loop (i + 1) + | otherwise = pure () + loop 0 + go (soff + 32) send dst (doff + 32) + else pure doff asWord8s :: PrimArray a -> PrimArray Word8 asWord8s (PrimArray x) = PrimArray x @@ -896,43 +794,38 @@ asWord8s (PrimArray x) = PrimArray x -- Internal function. Precondition, the referenced slice of the -- byte sequence is UTF-8 encoded text. slicedUtf8TextJson :: ByteArray# -> Int# -> Int# -> Builder -{-# NOINLINE slicedUtf8TextJson #-} -slicedUtf8TextJson !src# !soff0# !slen0# = - fromFunction# - reqLen# - (\dst# doff0# s0# -> pasteUtf8TextJson# src# soff0# slen0# dst# doff0# s0#) - where +{-# noinline slicedUtf8TextJson #-} +slicedUtf8TextJson !src# !soff0# !slen0# = fromFunction# reqLen# + ( \dst# doff0# s0# -> pasteUtf8TextJson# src# soff0# slen0# dst# doff0# s0# ) + where -- We multiply by 6 because, in the worst case, everything might be in the -- unprintable ASCII range. The plus 2 is for the quotes on the ends. - !reqLen# = (6# *# slen0#) +# 2# + !reqLen# = (6# *# slen0# ) +# 2# -{- | Constructor for 'Builder' that works on a function with lifted -arguments instead of unlifted ones. This is just as unsafe as the -actual constructor. --} +-- | Constructor for 'Builder' that works on a function with lifted +-- arguments instead of unlifted ones. This is just as unsafe as the +-- actual constructor. fromFunction :: Int -> (forall s. MutableByteArray s -> Int -> ST s Int) -> Builder -{-# INLINE fromFunction #-} +{-# inline fromFunction #-} fromFunction (I# req) f = Builder $ \buf0 off0 len0 cs0 s0 -> let !(# s1, buf1, off1, len1, cs1 #) = case len0 >=# req of 1# -> (# s0, buf0, off0, len0, cs0 #) - _ -> - let !(I# lenX) = max 4080 (I# req) - in case Exts.newByteArray# lenX s0 of - (# sX, bufX #) -> - (# sX, bufX, 0#, lenX, Mutable buf0 off0 cs0 #) + _ -> let !(I# lenX) = max 4080 (I# req) in + case Exts.newByteArray# lenX s0 of + (# sX, bufX #) -> + (# sX, bufX, 0#, lenX, Mutable buf0 off0 cs0 #) in case unST (f (MutableByteArray buf1) (I# off1)) s1 of (# s2, I# off2 #) -> (# s2, buf1, off2, len1 -# (off2 -# off1), cs1 #) -fromFunction# :: Int# -> (forall s. MutableByteArray# s -> Int# -> State# s -> (# State# s, Int# #)) -> Builder -{-# INLINE fromFunction# #-} +fromFunction# :: Int# -> (forall s. MutableByteArray# s -> Int# -> State# s -> (# State# s, Int# #) ) -> Builder +{-# inline fromFunction# #-} fromFunction# req f = Builder $ \buf0 off0 len0 cs0 s0 -> let !(# s1, buf1, off1, len1, cs1 #) = case len0 >=# req of 1# -> (# s0, buf0, off0, len0, cs0 #) - _ -> - let !(I# lenX) = max 4080 (I# req) - in case Exts.newByteArray# lenX s0 of - (# sX, bufX #) -> - (# sX, bufX, 0#, lenX, Mutable buf0 off0 cs0 #) + _ -> let !(I# lenX) = max 4080 (I# req) in + case Exts.newByteArray# lenX s0 of + (# sX, bufX #) -> + (# sX, bufX, 0#, lenX, Mutable buf0 off0 cs0 #) in case f buf1 off1 s1 of (# s2, off2 #) -> (# s2, buf1, off2, len1 -# (off2 -# off1), cs1 #) @@ -942,222 +835,192 @@ shortTextUtf8 a = let ba = shortTextToByteArray a in bytes (Bytes ba 0 (PM.sizeofByteArray ba)) +#if MIN_VERSION_text(2,0,0) -- | Create a builder from text. The text will be UTF-8 encoded. textUtf8 :: Text -> Builder textUtf8 (I.Text (A.ByteArray b) off len) = bytes (Bytes (ByteArray b) off len) +#endif -{- | Create a builder from text. The text will be UTF-8 encoded, -and JSON special characters will be escaped. Additionally, the -result is surrounded by double quotes. For example: - -* @foo ==\> "foo"@ (no escape sequences) -* @\\_"_\/ ==\> "\\\\_\\"_\/"@ (escapes backslashes and quotes) -* @hello\world ==> "hello\\u001Bworld"@ (where @\@ is code point 0x1B) --} +-- | Create a builder from text. The text will be UTF-8 encoded, +-- and JSON special characters will be escaped. Additionally, the +-- result is surrounded by double quotes. For example: +-- +-- * @foo ==\> "foo"@ (no escape sequences) +-- * @\\_"_\/ ==\> "\\\\_\\"_\/"@ (escapes backslashes and quotes) +-- * @hello\world ==> "hello\\u001Bworld"@ (where @\@ is code point 0x1B) shortTextJsonString :: ShortText -> Builder -{-# INLINE shortTextJsonString #-} +{-# inline shortTextJsonString #-} shortTextJsonString a = let !(ByteArray ba) = shortTextToByteArray a !(I# len) = PM.sizeofByteArray (ByteArray ba) in slicedUtf8TextJson ba 0# len -textJsonString :: Text -> Builder -{-# INLINE textJsonString #-} -textJsonString (I.Text (A.ByteArray ba) (I# off) (I# len)) = slicedUtf8TextJson ba off len - -{- | Encodes an unsigned 64-bit integer as decimal. -This encoding never starts with a zero unless the -argument was zero. --} +-- | Encodes an unsigned 64-bit integer as decimal. +-- This encoding never starts with a zero unless the +-- argument was zero. word64Dec :: Word64 -> Builder word64Dec w = fromBounded Nat.constant (Bounded.word64Dec w) -{- | Encodes an unsigned 16-bit integer as decimal. -This encoding never starts with a zero unless the -argument was zero. --} +-- | Encodes an unsigned 16-bit integer as decimal. +-- This encoding never starts with a zero unless the +-- argument was zero. word32Dec :: Word32 -> Builder word32Dec w = fromBounded Nat.constant (Bounded.word32Dec w) -{- | Encodes an unsigned 16-bit integer as decimal. -This encoding never starts with a zero unless the -argument was zero. --} +-- | Encodes an unsigned 16-bit integer as decimal. +-- This encoding never starts with a zero unless the +-- argument was zero. word16Dec :: Word16 -> Builder word16Dec w = fromBounded Nat.constant (Bounded.word16Dec w) -{- | Encodes an unsigned 8-bit integer as decimal. -This encoding never starts with a zero unless the -argument was zero. --} +-- | Encodes an unsigned 8-bit integer as decimal. +-- This encoding never starts with a zero unless the +-- argument was zero. word8Dec :: Word8 -> Builder word8Dec w = fromBounded Nat.constant (Bounded.word8Dec w) -{- | Encodes an unsigned machine-sized integer as decimal. -This encoding never starts with a zero unless the -argument was zero. --} +-- | Encodes an unsigned machine-sized integer as decimal. +-- This encoding never starts with a zero unless the +-- argument was zero. wordDec :: Word -> Builder wordDec w = fromBounded Nat.constant (Bounded.wordDec w) -{- | Encode a double-floating-point number, using decimal notation or -scientific notation depending on the magnitude. This has undefined -behavior when representing @+inf@, @-inf@, and @NaN@. It will not -crash, but the generated numbers will be nonsense. --} +-- | Encode a double-floating-point number, using decimal notation or +-- scientific notation depending on the magnitude. This has undefined +-- behavior when representing @+inf@, @-inf@, and @NaN@. It will not +-- crash, but the generated numbers will be nonsense. doubleDec :: Double -> Builder doubleDec w = fromBounded Nat.constant (Bounded.doubleDec w) -{- | Encodes a signed 64-bit integer as decimal. -This encoding never starts with a zero unless the argument was zero. -Negative numbers are preceded by a minus sign. Positive numbers -are not preceded by anything. --} +-- | Encodes a signed 64-bit integer as decimal. +-- This encoding never starts with a zero unless the argument was zero. +-- Negative numbers are preceded by a minus sign. Positive numbers +-- are not preceded by anything. int64Dec :: Int64 -> Builder int64Dec w = fromBounded Nat.constant (Bounded.int64Dec w) -{- | Encodes a signed 32-bit integer as decimal. -This encoding never starts with a zero unless the argument was zero. -Negative numbers are preceded by a minus sign. Positive numbers -are not preceded by anything. --} +-- | Encodes a signed 32-bit integer as decimal. +-- This encoding never starts with a zero unless the argument was zero. +-- Negative numbers are preceded by a minus sign. Positive numbers +-- are not preceded by anything. int32Dec :: Int32 -> Builder int32Dec w = fromBounded Nat.constant (Bounded.int32Dec w) -{- | Encodes a signed 16-bit integer as decimal. -This encoding never starts with a zero unless the argument was zero. -Negative numbers are preceded by a minus sign. Positive numbers -are not preceded by anything. --} +-- | Encodes a signed 16-bit integer as decimal. +-- This encoding never starts with a zero unless the argument was zero. +-- Negative numbers are preceded by a minus sign. Positive numbers +-- are not preceded by anything. int16Dec :: Int16 -> Builder int16Dec w = fromBounded Nat.constant (Bounded.int16Dec w) -{- | Encodes a signed 8-bit integer as decimal. -This encoding never starts with a zero unless the argument was zero. -Negative numbers are preceded by a minus sign. Positive numbers -are not preceded by anything. --} +-- | Encodes a signed 8-bit integer as decimal. +-- This encoding never starts with a zero unless the argument was zero. +-- Negative numbers are preceded by a minus sign. Positive numbers +-- are not preceded by anything. int8Dec :: Int8 -> Builder int8Dec w = fromBounded Nat.constant (Bounded.int8Dec w) -{- | Encodes a signed machine-sized integer as decimal. -This encoding never starts with a zero unless the argument was zero. -Negative numbers are preceded by a minus sign. Positive numbers -are not preceded by anything. --} +-- | Encodes a signed machine-sized integer as decimal. +-- This encoding never starts with a zero unless the argument was zero. +-- Negative numbers are preceded by a minus sign. Positive numbers +-- are not preceded by anything. intDec :: Int -> Builder intDec w = fromBounded Nat.constant (Bounded.intDec w) -{- | Encode a 64-bit unsigned integer as hexadecimal, zero-padding -the encoding to 16 digits. This uses uppercase for the alphabetical -digits. For example, this encodes the number 1022 as @00000000000003FE@. --} +-- | Encode a 64-bit unsigned integer as hexadecimal, zero-padding +-- the encoding to 16 digits. This uses uppercase for the alphabetical +-- digits. For example, this encodes the number 1022 as @00000000000003FE@. word64PaddedUpperHex :: Word64 -> Builder word64PaddedUpperHex w = fromBounded Nat.constant (Bounded.word64PaddedUpperHex w) -{- | Encode a 32-bit unsigned integer as hexadecimal, zero-padding -the encoding to 8 digits. This uses uppercase for the alphabetical -digits. For example, this encodes the number 1022 as @000003FE@. --} +-- | Encode a 32-bit unsigned integer as hexadecimal, zero-padding +-- the encoding to 8 digits. This uses uppercase for the alphabetical +-- digits. For example, this encodes the number 1022 as @000003FE@. word32PaddedUpperHex :: Word32 -> Builder word32PaddedUpperHex w = fromBounded Nat.constant (Bounded.word32PaddedUpperHex w) -{- | Encode a 16-bit unsigned integer as hexadecimal, zero-padding -the encoding to 4 digits. This uses uppercase for the alphabetical -digits. For example, this encodes the number 1022 as @03FE@. --} +-- | Encode a 16-bit unsigned integer as hexadecimal, zero-padding +-- the encoding to 4 digits. This uses uppercase for the alphabetical +-- digits. For example, this encodes the number 1022 as @03FE@. word16PaddedUpperHex :: Word16 -> Builder word16PaddedUpperHex w = fromBounded Nat.constant (Bounded.word16PaddedUpperHex w) -{- | Encode a 16-bit unsigned integer as hexadecimal, zero-padding -the encoding to 4 digits. This uses lowercase for the alphabetical -digits. For example, this encodes the number 1022 as @03fe@. --} +-- | Encode a 16-bit unsigned integer as hexadecimal, zero-padding +-- the encoding to 4 digits. This uses lowercase for the alphabetical +-- digits. For example, this encodes the number 1022 as @03fe@. word16PaddedLowerHex :: Word16 -> Builder word16PaddedLowerHex w = fromBounded Nat.constant (Bounded.word16PaddedLowerHex w) -{- | Encode a 16-bit unsigned integer as hexadecimal without leading -zeroes. This uses lowercase for the alphabetical digits. For -example, this encodes the number 1022 as @3fe@. --} +-- | Encode a 16-bit unsigned integer as hexadecimal without leading +-- zeroes. This uses lowercase for the alphabetical digits. For +-- example, this encodes the number 1022 as @3fe@. word16LowerHex :: Word16 -> Builder word16LowerHex w = fromBounded Nat.constant (Bounded.word16LowerHex w) -{- | Encode a 16-bit unsigned integer as hexadecimal without leading -zeroes. This uses uppercase for the alphabetical digits. For -example, this encodes the number 1022 as @3FE@. --} +-- | Encode a 16-bit unsigned integer as hexadecimal without leading +-- zeroes. This uses uppercase for the alphabetical digits. For +-- example, this encodes the number 1022 as @3FE@. word16UpperHex :: Word16 -> Builder word16UpperHex w = fromBounded Nat.constant (Bounded.word16UpperHex w) -{- | Encode a 16-bit unsigned integer as hexadecimal without leading -zeroes. This uses lowercase for the alphabetical digits. For -example, this encodes the number 1022 as @3FE@. --} +-- | Encode a 16-bit unsigned integer as hexadecimal without leading +-- zeroes. This uses lowercase for the alphabetical digits. For +-- example, this encodes the number 1022 as @3FE@. word8LowerHex :: Word8 -> Builder word8LowerHex w = fromBounded Nat.constant (Bounded.word8LowerHex w) -{- | Encode a 8-bit unsigned integer as hexadecimal, zero-padding -the encoding to 2 digits. This uses uppercase for the alphabetical -digits. For example, this encodes the number 11 as @0B@. --} +-- | Encode a 8-bit unsigned integer as hexadecimal, zero-padding +-- the encoding to 2 digits. This uses uppercase for the alphabetical +-- digits. For example, this encodes the number 11 as @0B@. word8PaddedUpperHex :: Word8 -> Builder word8PaddedUpperHex w = fromBounded Nat.constant (Bounded.word8PaddedUpperHex w) -{- | Encode an ASCII char. -Precondition: Input must be an ASCII character. This is not checked. --} +-- | Encode an ASCII char. +-- Precondition: Input must be an ASCII character. This is not checked. ascii :: Char -> Builder ascii c = fromBoundedOne (Bounded.ascii c) -{- | Encode two ASCII characters. -Precondition: Must be an ASCII characters. This is not checked. --} +-- | Encode two ASCII characters. +-- Precondition: Must be an ASCII characters. This is not checked. ascii2 :: Char -> Char -> Builder ascii2 a b = fromBounded Nat.constant (Bounded.ascii2 a b) -{- | Encode three ASCII characters. -Precondition: Must be an ASCII characters. This is not checked. --} +-- | Encode three ASCII characters. +-- Precondition: Must be an ASCII characters. This is not checked. ascii3 :: Char -> Char -> Char -> Builder ascii3 a b c = fromBounded Nat.constant (Bounded.ascii3 a b c) -{- | Encode four ASCII characters. -Precondition: Must be an ASCII characters. This is not checked. --} +-- | Encode four ASCII characters. +-- Precondition: Must be an ASCII characters. This is not checked. ascii4 :: Char -> Char -> Char -> Char -> Builder ascii4 a b c d = fromBounded Nat.constant (Bounded.ascii4 a b c d) -{- | Encode five ASCII characters. -Precondition: Must be an ASCII characters. This is not checked. --} +-- | Encode five ASCII characters. +-- Precondition: Must be an ASCII characters. This is not checked. ascii5 :: Char -> Char -> Char -> Char -> Char -> Builder ascii5 a b c d e = fromBounded Nat.constant (Bounded.ascii5 a b c d e) -{- | Encode six ASCII characters. -Precondition: Must be an ASCII characters. This is not checked. --} +-- | Encode six ASCII characters. +-- Precondition: Must be an ASCII characters. This is not checked. ascii6 :: Char -> Char -> Char -> Char -> Char -> Char -> Builder ascii6 a b c d e f = fromBounded Nat.constant (Bounded.ascii6 a b c d e f) -{- | Encode seven ASCII characters. -Precondition: Must be an ASCII characters. This is not checked. --} +-- | Encode seven ASCII characters. +-- Precondition: Must be an ASCII characters. This is not checked. ascii7 :: Char -> Char -> Char -> Char -> Char -> Char -> Char -> Builder ascii7 a b c d e f g = fromBounded Nat.constant (Bounded.ascii7 a b c d e f g) -{- | Encode eight ASCII characters. -Precondition: Must be an ASCII characters. This is not checked. --} +-- | Encode eight ASCII characters. +-- Precondition: Must be an ASCII characters. This is not checked. ascii8 :: Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Builder ascii8 a b c d e f g h = fromBounded Nat.constant (Bounded.ascii8 a b c d e f g h) @@ -1168,99 +1031,83 @@ char c = fromBounded Nat.constant (Bounded.char c) unST :: ST s a -> State# s -> (# State# s, a #) unST (ST f) = f -{- | Requires exactly 8 bytes. Dump the octets of a 64-bit -signed integer in a little-endian fashion. --} +-- | Requires exactly 8 bytes. Dump the octets of a 64-bit +-- signed integer in a little-endian fashion. int64LE :: Int64 -> Builder int64LE w = fromBounded Nat.constant (Bounded.int64LE w) -{- | Requires exactly 4 bytes. Dump the octets of a 32-bit -signed integer in a little-endian fashion. --} +-- | Requires exactly 4 bytes. Dump the octets of a 32-bit +-- signed integer in a little-endian fashion. int32LE :: Int32 -> Builder int32LE w = fromBounded Nat.constant (Bounded.int32LE w) -{- | Requires exactly 2 bytes. Dump the octets of a 16-bit -signed integer in a little-endian fashion. --} +-- | Requires exactly 2 bytes. Dump the octets of a 16-bit +-- signed integer in a little-endian fashion. int16LE :: Int16 -> Builder int16LE w = fromBounded Nat.constant (Bounded.int16LE w) -{- | Requires exactly 8 bytes. Dump the octets of a 64-bit -signed integer in a big-endian fashion. --} +-- | Requires exactly 8 bytes. Dump the octets of a 64-bit +-- signed integer in a big-endian fashion. int64BE :: Int64 -> Builder int64BE w = fromBounded Nat.constant (Bounded.int64BE w) -{- | Requires exactly 4 bytes. Dump the octets of a 32-bit -signed integer in a big-endian fashion. --} +-- | Requires exactly 4 bytes. Dump the octets of a 32-bit +-- signed integer in a big-endian fashion. int32BE :: Int32 -> Builder int32BE w = fromBounded Nat.constant (Bounded.int32BE w) -{- | Requires exactly 2 bytes. Dump the octets of a 16-bit -signed integer in a big-endian fashion. --} +-- | Requires exactly 2 bytes. Dump the octets of a 16-bit +-- signed integer in a big-endian fashion. int16BE :: Int16 -> Builder int16BE w = fromBounded Nat.constant (Bounded.int16BE w) -{- | Requires exactly 32 bytes. Dump the octets of a 256-bit -word in a little-endian fashion. --} +-- | Requires exactly 32 bytes. Dump the octets of a 256-bit +-- word in a little-endian fashion. word256LE :: Word256 -> Builder word256LE w = fromBounded Nat.constant (Bounded.word256LE w) -{- | Requires exactly 16 bytes. Dump the octets of a 128-bit -word in a little-endian fashion. --} +-- | Requires exactly 16 bytes. Dump the octets of a 128-bit +-- word in a little-endian fashion. word128LE :: Word128 -> Builder word128LE w = fromBounded Nat.constant (Bounded.word128LE w) -{- | Requires exactly 8 bytes. Dump the octets of a 64-bit -word in a little-endian fashion. --} +-- | Requires exactly 8 bytes. Dump the octets of a 64-bit +-- word in a little-endian fashion. word64LE :: Word64 -> Builder word64LE w = fromBounded Nat.constant (Bounded.word64LE w) -{- | Requires exactly 4 bytes. Dump the octets of a 32-bit -word in a little-endian fashion. --} +-- | Requires exactly 4 bytes. Dump the octets of a 32-bit +-- word in a little-endian fashion. word32LE :: Word32 -> Builder word32LE w = fromBounded Nat.constant (Bounded.word32LE w) -{- | Requires exactly 2 bytes. Dump the octets of a 16-bit -word in a little-endian fashion. --} +-- | Requires exactly 2 bytes. Dump the octets of a 16-bit +-- word in a little-endian fashion. word16LE :: Word16 -> Builder word16LE w = fromBounded Nat.constant (Bounded.word16LE w) -{- | Requires exactly 32 bytes. Dump the octets of a 256-bit -word in a big-endian fashion. --} +-- | Requires exactly 32 bytes. Dump the octets of a 256-bit +-- word in a big-endian fashion. word256BE :: Word256 -> Builder word256BE w = fromBounded Nat.constant (Bounded.word256BE w) -{- | Requires exactly 16 bytes. Dump the octets of a 128-bit -word in a big-endian fashion. --} +-- | Requires exactly 16 bytes. Dump the octets of a 128-bit +-- word in a big-endian fashion. word128BE :: Word128 -> Builder word128BE w = fromBounded Nat.constant (Bounded.word128BE w) -{- | Requires exactly 8 bytes. Dump the octets of a 64-bit -word in a big-endian fashion. --} +-- | Requires exactly 8 bytes. Dump the octets of a 64-bit +-- word in a big-endian fashion. word64BE :: Word64 -> Builder word64BE w = fromBounded Nat.constant (Bounded.word64BE w) -{- | Requires exactly 4 bytes. Dump the octets of a 32-bit -word in a big-endian fashion. --} +-- | Requires exactly 4 bytes. Dump the octets of a 32-bit +-- word in a big-endian fashion. word32BE :: Word32 -> Builder word32BE w = fromBounded Nat.constant (Bounded.word32BE w) -{- | Requires exactly 2 bytes. Dump the octets of a 16-bit -word in a big-endian fashion. --} +-- | Requires exactly 2 bytes. Dump the octets of a 16-bit +-- word in a big-endian fashion. word16BE :: Word16 -> Builder word16BE w = fromBounded Nat.constant (Bounded.word16BE w) @@ -1270,14 +1117,11 @@ word8 w = fromBoundedOne (Bounded.word8 w) -- | Prefix a builder with the number of bytes that it requires. consLength :: - -- | Number of bytes used by the serialization of the length - Arithmetic.Nat n -> - -- | Length serialization function - (Int -> Bounded.Builder n) -> - -- | Builder whose length is measured - Builder -> - Builder -{-# INLINE consLength #-} + Arithmetic.Nat n -- ^ Number of bytes used by the serialization of the length + -> (Int -> Bounded.Builder n) -- ^ Length serialization function + -> Builder -- ^ Builder whose length is measured + -> Builder +{-# inline consLength #-} consLength !n buildSize (Builder f) = Builder $ \buf0 off0 len0 cs0 s0 -> -- There is actually a little bit of unsoundness here. If the number of -- bytes required to encode the length is greater than 4080, this will @@ -1291,200 +1135,170 @@ consLength !n buildSize (Builder f) = Builder $ \buf0 off0 len0 cs0 s0 -> in case f buf1 (off1 +# lenSz) (len1 -# lenSz) cs1 s1 of (# s2, buf2, off2, len2, cs2 #) -> let !dist = commitDistance1 buf1 (off1 +# lenSz) buf2 off2 cs2 - ST g = - UnsafeBounded.pasteST - (buildSize (fromIntegral (I# dist))) - (MutableByteArray buf1) - (I# off1) + ST g = UnsafeBounded.pasteST + (buildSize (fromIntegral (I# dist))) + (MutableByteArray buf1) + (I# off1) in case g s2 of (# s3, _ #) -> (# s3, buf2, off2, len2, cs2 #) -{- | Variant of 'consLength32BE' the encodes the length in -a little-endian fashion. --} +-- | Variant of 'consLength32BE' the encodes the length in +-- a little-endian fashion. consLength32LE :: Builder -> Builder consLength32LE = consLength Nat.constant (\x -> Bounded.word32LE (fromIntegral x)) -{- | Prefix a builder with its size in bytes. This size is -presented as a big-endian 32-bit word. The need to prefix -a builder with its length shows up a numbers of wire protocols -including those of PostgreSQL and Apache Kafka. Note the -equivalence: - -> forall (n :: Int) (x :: Builder). -> let sz = sizeofByteArray (run n (consLength32BE x)) -> consLength32BE x === word32BE (fromIntegral sz) <> x - -However, using 'consLength32BE' is much more efficient here -since it only materializes the 'ByteArray' once. --} +-- | Prefix a builder with its size in bytes. This size is +-- presented as a big-endian 32-bit word. The need to prefix +-- a builder with its length shows up a numbers of wire protocols +-- including those of PostgreSQL and Apache Kafka. Note the +-- equivalence: +-- +-- > forall (n :: Int) (x :: Builder). +-- > let sz = sizeofByteArray (run n (consLength32BE x)) +-- > consLength32BE x === word32BE (fromIntegral sz) <> x +-- +-- However, using 'consLength32BE' is much more efficient here +-- since it only materializes the 'ByteArray' once. consLength32BE :: Builder -> Builder consLength32BE = consLength Nat.constant (\x -> Bounded.word32BE (fromIntegral x)) -{- | Prefix a builder with its size in bytes. This size is -presented as a big-endian 64-bit word. See 'consLength32BE'. --} +-- | Prefix a builder with its size in bytes. This size is +-- presented as a big-endian 64-bit word. See 'consLength32BE'. consLength64BE :: Builder -> Builder consLength64BE = consLength Nat.constant (\x -> Bounded.word64BE (fromIntegral x)) -{- | Push the buffer currently being filled onto the chunk list, -allocating a new active buffer of the requested size. This is -helpful when a small builder is sandwhiched between two large -zero-copy builders: - -> insert bigA <> flush 1 <> word8 0x42 <> insert bigB - -Without @flush 1@, @word8 0x42@ would see the zero-byte active -buffer that 'insert' returned, decide that it needed more space, -and allocate a 4080-byte buffer to which only a single byte -would be written. --} +-- | Push the buffer currently being filled onto the chunk list, +-- allocating a new active buffer of the requested size. This is +-- helpful when a small builder is sandwhiched between two large +-- zero-copy builders: +-- +-- > insert bigA <> flush 1 <> word8 0x42 <> insert bigB +-- +-- Without @flush 1@, @word8 0x42@ would see the zero-byte active +-- buffer that 'insert' returned, decide that it needed more space, +-- and allocate a 4080-byte buffer to which only a single byte +-- would be written. flush :: Int -> Builder flush !reqSz = Builder $ \buf0 off0 _ cs0 s0 -> case Exts.newByteArray# sz# s0 of (# sX, bufX #) -> (# sX, bufX, 0#, sz#, Mutable buf0 off0 cs0 #) - where - !(I# sz#) = max reqSz 0 + where + !(I# sz# ) = max reqSz 0 -- ShortText is already UTF-8 encoded. This is a no-op. shortTextToByteArray :: ShortText -> ByteArray shortTextToByteArray x = case TS.toShortByteString x of SBS a -> ByteArray a -{- | Encode a signed machine-sized integer with LEB-128. This uses -zig-zag encoding. --} +-- | Encode a signed machine-sized integer with LEB-128. This uses +-- zig-zag encoding. intLEB128 :: Int -> Builder -{-# INLINE intLEB128 #-} +{-# inline intLEB128 #-} intLEB128 = wordLEB128 . toZigzagNative -- | Encode a 32-bit signed integer with LEB-128. This uses zig-zag encoding. int32LEB128 :: Int32 -> Builder -{-# INLINE int32LEB128 #-} +{-# inline int32LEB128 #-} int32LEB128 = word32LEB128 . toZigzag32 -- | Encode a 64-bit signed integer with LEB-128. This uses zig-zag encoding. int64LEB128 :: Int64 -> Builder -{-# INLINE int64LEB128 #-} +{-# inline int64LEB128 #-} int64LEB128 = word64LEB128 . toZigzag64 -- | Encode a machine-sized word with LEB-128. wordLEB128 :: Word -> Builder -{-# INLINE wordLEB128 #-} +{-# inline wordLEB128 #-} wordLEB128 w = fromBounded Nat.constant (Bounded.wordLEB128 w) --- | Encode a 16-bit word with LEB-128. -word16LEB128 :: Word16 -> Builder -{-# INLINE word16LEB128 #-} -word16LEB128 w = fromBounded Nat.constant (Bounded.word16LEB128 w) - -- | Encode a 32-bit word with LEB-128. word32LEB128 :: Word32 -> Builder -{-# INLINE word32LEB128 #-} +{-# inline word32LEB128 #-} word32LEB128 w = fromBounded Nat.constant (Bounded.word32LEB128 w) -- | Encode a 64-bit word with LEB-128. word64LEB128 :: Word64 -> Builder -{-# INLINE word64LEB128 #-} +{-# inline word64LEB128 #-} word64LEB128 w = fromBounded Nat.constant (Bounded.word64LEB128 w) -- | Encode a machine-sized word with VLQ. wordVlq :: Word -> Builder -{-# INLINE wordVlq #-} +{-# inline wordVlq #-} wordVlq w = fromBounded Nat.constant (Bounded.wordVlq w) -- | Encode a 32-bit word with VLQ. word32Vlq :: Word32 -> Builder -{-# INLINE word32Vlq #-} +{-# inline word32Vlq #-} word32Vlq w = fromBounded Nat.constant (Bounded.word32Vlq w) -- | Encode a 64-bit word with VLQ. word64Vlq :: Word64 -> Builder -{-# INLINE word64Vlq #-} +{-# inline word64Vlq #-} word64Vlq w = fromBounded Nat.constant (Bounded.word64Vlq w) -{- | Encode a signed arbitrary-precision integer as decimal. -This encoding never starts with a zero unless the argument was zero. -Negative numbers are preceded by a minus sign. Positive numbers -are not preceded by anything. --} +-- | Encode a signed arbitrary-precision integer as decimal. +-- This encoding never starts with a zero unless the argument was zero. +-- Negative numbers are preceded by a minus sign. Positive numbers +-- are not preceded by anything. integerDec :: Integer -> Builder integerDec !i | i < 0 = ascii '-' <> naturalDec (naturalFromInteger (negate i)) | otherwise = naturalDec (naturalFromInteger i) -{- | Encodes an unsigned arbitrary-precision integer as decimal. -This encoding never starts with a zero unless the argument was zero. --} +-- | Encodes an unsigned arbitrary-precision integer as decimal. +-- This encoding never starts with a zero unless the argument was zero. naturalDec :: Natural -> Builder -naturalDec !n0 = - fromEffect - (I# (11# +# (3# *# integerLog2# (naturalToInteger n0)))) - ( \marr off -> case n0 of - 0 -> do - PM.writeByteArray marr off (0x30 :: Word8) - pure (off + 1) - _ -> go n0 marr off off - ) - where +naturalDec !n0 = fromEffect + (I# (11# +# (3# *# integerLog2# (naturalToInteger n0)))) + (\marr off -> case n0 of + 0 -> do + PM.writeByteArray marr off (0x30 :: Word8) + pure (off + 1) + _ -> go n0 marr off off + ) + where go :: forall s. Natural -> MutableByteArray s -> Int -> Int -> ST s Int go !n !buf !off0 !off = case quotRem n 1_000_000_000 of - (q, r) -> case q of + (q,r) -> case q of 0 -> do off' <- backwardsWordLoop buf off (fromIntegral @Natural @Word r) reverseBytes buf off0 (off' - 1) pure off' _ -> do - off' <- - backwardsPasteWordPaddedDec9 - (fromIntegral @Natural @Word r) - buf - off + off' <- backwardsPasteWordPaddedDec9 + (fromIntegral @Natural @Word r) buf off go q buf off0 off' -- Reverse the bytes in the designated slice. This takes -- an inclusive start offset and an inclusive end offset. reverseBytes :: MutableByteArray s -> Int -> Int -> ST s () -{-# INLINE reverseBytes #-} -reverseBytes arr begin end = go begin end - where - go ixA ixB = - if ixA < ixB - then do - a :: Word8 <- PM.readByteArray arr ixA - b :: Word8 <- PM.readByteArray arr ixB - PM.writeByteArray arr ixA b - PM.writeByteArray arr ixB a - go (ixA + 1) (ixB - 1) - else pure () +{-# inline reverseBytes #-} +reverseBytes arr begin end = go begin end where + go ixA ixB = if ixA < ixB + then do + a :: Word8 <- PM.readByteArray arr ixA + b :: Word8 <- PM.readByteArray arr ixB + PM.writeByteArray arr ixA b + PM.writeByteArray arr ixB a + go (ixA + 1) (ixB - 1) + else pure () backwardsPasteWordPaddedDec9 :: Word -> MutableByteArray s -> Int -> ST s Int backwardsPasteWordPaddedDec9 !w !arr !off = do backwardsPutRem10 - ( backwardsPutRem10 $ - backwardsPutRem10 $ - backwardsPutRem10 $ - backwardsPutRem10 $ - backwardsPutRem10 $ - backwardsPutRem10 $ - backwardsPutRem10 $ - backwardsPutRem10 - (\_ _ _ -> pure ()) - ) - arr - off - w + (backwardsPutRem10 $ backwardsPutRem10 $ backwardsPutRem10 $ + backwardsPutRem10 $ backwardsPutRem10 $ backwardsPutRem10 $ + backwardsPutRem10 $ backwardsPutRem10 + (\_ _ _ -> pure ()) + ) arr off w pure (off + 9) backwardsPutRem10 :: - (MutableByteArray s -> Int -> Word -> ST s a) -> - MutableByteArray s -> - Int -> - Word -> - ST s a -{-# INLINE backwardsPutRem10 #-} + (MutableByteArray s -> Int -> Word -> ST s a) + -> MutableByteArray s -> Int -> Word -> ST s a +{-# inline backwardsPutRem10 #-} backwardsPutRem10 andThen arr off dividend = do let quotient = approxDiv10 dividend remainder = dividend - (10 * quotient) @@ -1492,31 +1306,25 @@ backwardsPutRem10 andThen arr off dividend = do andThen arr (off + 1) quotient backwardsWordLoop :: MutableByteArray s -> Int -> Word -> ST s Int -{-# INLINE backwardsWordLoop #-} -backwardsWordLoop arr off0 x0 = go off0 x0 - where - go !off !(x :: Word) = - if x > 0 - then do - let (y, z) = quotRem x 10 - PM.writeByteArray arr off (fromIntegral (z + 0x30) :: Word8) - go (off + 1) y - else pure off +{-# inline backwardsWordLoop #-} +backwardsWordLoop arr off0 x0 = go off0 x0 where + go !off !(x :: Word) = if x > 0 + then do + let (y,z) = quotRem x 10 + PM.writeByteArray arr off (fromIntegral (z + 0x30) :: Word8) + go (off + 1) y + else pure off -- | Replicate a byte the given number of times. replicate :: - -- | Number of times to replicate the byte - Int -> - -- | Byte to replicate - Word8 -> - Builder -replicate !len !w = - fromEffect - len - ( \marr off -> do - PM.setByteArray marr off len w - pure (off + len) - ) + Int -- ^ Number of times to replicate the byte + -> Word8 -- ^ Byte to replicate + -> Builder +replicate !len !w = fromEffect len + (\marr off -> do + PM.setByteArray marr off len w + pure (off + len) + ) -- Based on C code from https://stackoverflow.com/a/5558614 -- For numbers less than 1073741829, this gives a correct answer. @@ -1538,22 +1346,21 @@ approxDiv10 !n = unsafeShiftR (0x1999999A * n) 32 unsafeWordToWord8 :: Word -> Word8 unsafeWordToWord8 (W# w) = W8# (C.wordToWord8# w) -{- | This function and the documentation for it are copied from -Takano Akio's fast-builder library. - -@'rebuild' b@ is equivalent to @b@, but it allows GHC to assume -that @b@ will be run at most once. This can enable various -optimizations that greately improve performance. - -There are two types of typical situations where a use of 'rebuild' -is often a win: - -* When constructing a builder using a recursive function. e.g. - @rebuild $ foldr ...@. -* When constructing a builder using a conditional expression. e.g. - @rebuild $ case x of ... @ --} +-- | This function and the documentation for it are copied from +-- Takano Akio's fast-builder library. +-- +-- @'rebuild' b@ is equivalent to @b@, but it allows GHC to assume +-- that @b@ will be run at most once. This can enable various +-- optimizations that greately improve performance. +-- +-- There are two types of typical situations where a use of 'rebuild' +-- is often a win: +-- +-- * When constructing a builder using a recursive function. e.g. +-- @rebuild $ foldr ...@. +-- * When constructing a builder using a conditional expression. e.g. +-- @rebuild $ case x of ... @ rebuild :: Builder -> Builder -{-# INLINE rebuild #-} +{-# inline rebuild #-} rebuild (Builder f) = Builder $ oneShot $ \a -> oneShot $ \b -> oneShot $ \c -> oneShot $ \d -> oneShot $ \e -> f a b c d e diff --git a/src/Data/Bytes/Builder/Avro.hs b/src/Data/Bytes/Builder/Avro.hs deleted file mode 100644 index f86f350..0000000 --- a/src/Data/Bytes/Builder/Avro.hs +++ /dev/null @@ -1,87 +0,0 @@ -{-# LANGUAGE BangPatterns #-} - -{- | Builders for encoding data with Apache Avro. Most functions in this -module are just aliases for other functions. Avro uses zig-zag LEB128 -for all integral types. --} -module Data.Bytes.Builder.Avro - ( int - , int32 - , int64 - , word16 - , word32 - , word128 - , bytes - , chunks - , text - - -- * Maps - , map2 - ) where - -import Data.Bytes (Bytes) -import Data.Bytes.Builder (Builder) -import Data.Bytes.Chunks (Chunks) -import Data.Int -import Data.Text (Text) -import Data.WideWord (Word128) -import Data.Word - -import qualified Data.Bytes as Bytes -import qualified Data.Bytes.Builder as B -import qualified Data.Bytes.Chunks as Chunks -import qualified Data.Bytes.Text.Utf8 as Utf8 - -int32 :: Int32 -> Builder -int32 = B.int32LEB128 - -int64 :: Int64 -> Builder -int64 = B.int64LEB128 - -int :: Int -> Builder -int = B.intLEB128 - -{- | Note: This results in a zigzag encoded number. Avro does not have -unsigned types. --} -word16 :: Word16 -> Builder -word16 = B.int32LEB128 . fromIntegral - -{- | Note: This results in a zigzag encoded number. Avro does not have -unsigned types. --} -word32 :: Word32 -> Builder -word32 = B.int64LEB128 . fromIntegral - -{- | Note: This results in a @fixed@ encoded value of length 16. In the -schema, the type must be @{"type": "fixed", "name": "...", "size": 16}@. -A big-endian encoding is used. --} -word128 :: Word128 -> Builder -word128 = B.word128BE - -bytes :: Bytes -> Builder -bytes !b = int (Bytes.length b) <> B.bytes b - -chunks :: Chunks -> Builder -chunks !b = int (Chunks.length b) <> B.chunks b - -text :: Text -> Builder -text = bytes . Utf8.fromText - -{- | Encode a map with exactly two key-value pairs. The keys are text. -This is commonly used to encode the header in an avro file, which has -a map with two keys: @avro.schema@ and @avro.codec@. --} -map2 :: - -- | First key - Text -> - -- | First value (already encoded) - Builder -> - -- | Second key - Text -> - -- | Second value (already encoded) - Builder -> - Builder -{-# INLINE map2 #-} -map2 k1 v1 k2 v2 = B.word8 0x04 <> text k1 <> v1 <> text k2 <> v2 <> B.word8 0x00 diff --git a/src/Data/Bytes/Builder/Bounded.hs b/src/Data/Bytes/Builder/Bounded.hs index 5703ef3..c835d95 100644 --- a/src/Data/Bytes/Builder/Bounded.hs +++ b/src/Data/Bytes/Builder/Bounded.hs @@ -1,38 +1,34 @@ -{-# LANGUAGE BangPatterns #-} -{-# LANGUAGE BinaryLiterals #-} -{-# LANGUAGE DataKinds #-} -{-# LANGUAGE KindSignatures #-} -{-# LANGUAGE MagicHash #-} -{-# LANGUAGE NumericUnderscores #-} -{-# LANGUAGE RankNTypes #-} -{-# LANGUAGE ScopedTypeVariables #-} -{-# LANGUAGE TypeApplications #-} -{-# LANGUAGE TypeOperators #-} -{-# LANGUAGE UnboxedTuples #-} -{-# LANGUAGE UnliftedFFITypes #-} +{-# language CPP #-} +{-# language BangPatterns #-} +{-# language BinaryLiterals #-} +{-# language DataKinds #-} +{-# language KindSignatures #-} +{-# language LambdaCase #-} +{-# language MagicHash #-} +{-# language NumericUnderscores #-} +{-# language RankNTypes #-} +{-# language ScopedTypeVariables #-} +{-# language TypeApplications #-} +{-# language TypeOperators #-} +{-# language UnboxedTuples #-} +{-# language UnliftedFFITypes #-} -{- | The functions in this module are explict about the maximum number -of bytes they require. --} +-- | The functions in this module are explict about the maximum number +-- of bytes they require. module Data.Bytes.Builder.Bounded ( -- * Builder Builder - -- * Execute , run , runByteString , pasteGrowST - -- * Combine , empty , append - -- * Bounds Manipulation , weaken , substitute - -- * Encode Integral Types - -- ** Human-Readable , word64Dec , word32Dec @@ -44,32 +40,25 @@ module Data.Bytes.Builder.Bounded , int16Dec , int8Dec , intDec - -- * Unsigned Words - -- ** Wide Words , word128PaddedLowerHex , word128PaddedUpperHex , word256PaddedLowerHex , word256PaddedUpperHex - -- ** 64-bit , word64PaddedLowerHex , word64PaddedUpperHex - -- ** 48-bit , word48PaddedLowerHex - -- ** 32-bit , word32PaddedLowerHex , word32PaddedUpperHex - -- ** 16-bit , word16PaddedLowerHex , word16PaddedUpperHex , word16LowerHex , word16UpperHex - -- ** 8-bit , word8PaddedLowerHex , word8PaddedUpperHex @@ -83,18 +72,13 @@ module Data.Bytes.Builder.Bounded , ascii7 , ascii8 , char - -- ** Native , wordPaddedDec2 - , wordPaddedDec3 , wordPaddedDec4 , wordPaddedDec9 - -- ** Machine-Readable - -- *** One , word8 - -- **** Big Endian , word256BE , word128BE @@ -104,7 +88,6 @@ module Data.Bytes.Builder.Bounded , int64BE , int32BE , int16BE - -- **** Little Endian , word256LE , word128LE @@ -114,48 +97,43 @@ module Data.Bytes.Builder.Bounded , int64LE , int32LE , int16LE - -- **** LEB128 - -- | LEB128 encodes an integer in 7-bit units, least significant bits first, -- with the high bit of each output byte set to 1 in all bytes except for -- the final byte. , wordLEB128 - , word16LEB128 , word32LEB128 , word64LEB128 - -- **** VLQ - -- | VLQ (also known as VByte, Varint, VInt) encodes an integer in 7-bit -- units, most significant bits first, with the high bit of each output byte -- set to 1 in all bytes except for the final byte. , wordVlq , word32Vlq , word64Vlq - -- * Encode Floating-Point Types , doubleDec ) where -import Arithmetic.Types (type (:=:), type (<=)) +import Arithmetic.Types (type (<=), type (:=:)) import Control.Monad.Primitive (primitive_) import Control.Monad.ST (ST) -import Control.Monad.ST.Run (runByteArrayST, runIntByteArrayST) +import Control.Monad.ST.Run (runByteArrayST,runIntByteArrayST) import Data.Bits +import Data.Bytes.Builder.Bounded.Unsafe (Builder(..)) import Data.ByteString (ByteString) -import Data.Bytes.Builder.Bounded.Unsafe (Builder (..)) -import Data.Bytes.Types (Bytes (Bytes)) import Data.Char (ord) -import Data.Primitive (ByteArray, MutableByteArray (..), newByteArray, readByteArray, unsafeFreezeByteArray, writeByteArray) -import Data.Primitive.ByteArray.Offset (MutableByteArrayOffset (..)) -import Data.WideWord (Word128 (Word128), Word256 (Word256)) +import Data.Primitive (MutableByteArray(..),ByteArray,writeByteArray) +import Data.Primitive (readByteArray,newByteArray,unsafeFreezeByteArray) +import Data.Primitive.ByteArray.Offset (MutableByteArrayOffset(..)) +import Data.WideWord (Word128(Word128),Word256(Word256)) import GHC.Exts +import GHC.Int (Int64(I64#),Int32(I32#),Int16(I16#),Int8(I8#)) import GHC.IO (unsafeIOToST) -import GHC.Int (Int16 (I16#), Int32 (I32#), Int64 (I64#), Int8 (I8#)) -import GHC.ST (ST (ST)) +import GHC.ST (ST(ST)) import GHC.TypeLits (type (+)) -import GHC.Word (Word16 (W16#), Word32 (W32#), Word64 (W64#), Word8 (W8#)) +import GHC.Word (Word8(W8#),Word16(W16#),Word32(W32#),Word64(W64#)) +import Data.Bytes.Types (Bytes(Bytes)) import qualified Compat as C @@ -166,53 +144,48 @@ import qualified Data.Bytes as Bytes import qualified Data.Bytes.Builder.Bounded.Unsafe as Unsafe import qualified Data.Primitive as PM -{- | Execute the bounded builder. If the size is a constant, -use @Arithmetic.Nat.constant@ as the first argument to let -GHC conjure up this value for you. --} +-- | Execute the bounded builder. If the size is a constant, +-- use @Arithmetic.Nat.constant@ as the first argument to let +-- GHC conjure up this value for you. run :: - Arithmetic.Nat n -> - -- | Builder - Builder n -> - ByteArray -{-# INLINE run #-} + Arithmetic.Nat n + -> Builder n -- ^ Builder + -> ByteArray +{-# inline run #-} run n b = runByteArrayST $ do arr <- newByteArray (Nat.demote n) len <- Unsafe.pasteST b arr 0 shrinkMutableByteArray arr len unsafeFreezeByteArray arr -{- | Variant of 'run' that puts the result in a pinned buffer and -packs it up in a 'ByteString'. --} +-- | Variant of 'run' that puts the result in a pinned buffer and +-- packs it up in a 'ByteString'. runByteString :: - Arithmetic.Nat n -> - -- | Builder - Builder n -> - ByteString -{-# INLINE runByteString #-} + Arithmetic.Nat n + -> Builder n -- ^ Builder + -> ByteString +{-# inline runByteString #-} runByteString n b = - let (finalLen, r) = runIntByteArrayST $ do + let (finalLen,r) = runIntByteArrayST $ do arr <- PM.newPinnedByteArray (Nat.demote n) len <- Unsafe.pasteST b arr 0 shrinkMutableByteArray arr len arr' <- unsafeFreezeByteArray arr - pure (len, arr') + pure (len,arr') in Bytes.pinnedToByteString (Bytes r 0 finalLen) -{- | Paste the builder into the byte array starting at offset zero. -This reallocates the byte array if it cannot accomodate the builder, -growing it by the minimum amount necessary. --} +-- | Paste the builder into the byte array starting at offset zero. +-- This reallocates the byte array if it cannot accomodate the builder, +-- growing it by the minimum amount necessary. pasteGrowST :: - Arithmetic.Nat n -> - Builder n -> - -- | Initial buffer, used linearly. Do not reuse this argument. - MutableByteArrayOffset s -> - -- | Final buffer that accomodated the builder. - ST s (MutableByteArrayOffset s) -{-# INLINE pasteGrowST #-} -pasteGrowST n b !(MutableByteArrayOffset {array = arr0, offset = off0}) = do + Arithmetic.Nat n + -> Builder n + -> MutableByteArrayOffset s + -- ^ Initial buffer, used linearly. Do not reuse this argument. + -> ST s (MutableByteArrayOffset s) + -- ^ Final buffer that accomodated the builder. +{-# inline pasteGrowST #-} +pasteGrowST n b !(MutableByteArrayOffset{array=arr0,offset=off0}) = do sz0 <- PM.getSizeofMutableByteArray arr0 let req = Nat.demote n let sz1 = off0 + req @@ -240,16 +213,15 @@ unsafeAppend (Builder f) (Builder g) = Builder $ \arr off0 s0 -> case f arr off0 s0 of (# s1, r #) -> g arr r s1 -{- | Weaken the bound on the maximum number of bytes required. For example, -to use two builders with unequal bounds in a disjunctive setting: - -> import qualified Arithmetic.Lte as Lte -> -> buildNumber :: Either Double Word64 -> Builder 32 -> buildNumber = \case -> Left d -> doubleDec d -> Right w -> weaken (Lte.constant @19 @32) (word64Dec w) --} +-- | Weaken the bound on the maximum number of bytes required. For example, +-- to use two builders with unequal bounds in a disjunctive setting: +-- +-- > import qualified Arithmetic.Lte as Lte +-- > +-- > buildNumber :: Either Double Word64 -> Builder 32 +-- > buildNumber = \case +-- > Left d -> doubleDec d +-- > Right w -> weaken (Lte.constant @19 @32) (word64Dec w) weaken :: forall m n. (m <= n) -> Builder m -> Builder n weaken !_ (Builder f) = Builder f @@ -257,35 +229,35 @@ weaken !_ (Builder f) = Builder f substitute :: forall m n. (m :=: n) -> Builder m -> Builder n substitute !_ (Builder f) = Builder f -{- | Encode a double-floating-point number, using decimal notation or -scientific notation depending on the magnitude. This has undefined -behavior when representing @+inf@, @-inf@, and @NaN@. It will not -crash, but the generated numbers will be nonsense. --} +-- | Encode a double-floating-point number, using decimal notation or +-- scientific notation depending on the magnitude. This has undefined +-- behavior when representing @+inf@, @-inf@, and @NaN@. It will not +-- crash, but the generated numbers will be nonsense. doubleDec :: Double -> Builder 32 doubleDec (D# d) = Builder (\arr off0 s0 -> doubleDec# d arr off0 s0) -{- | Requires up to 19 bytes. Encodes an unsigned 64-bit integer as decimal. -This encoding never starts with a zero unless the argument was zero. --} +-- | Requires up to 19 bytes. Encodes an unsigned 64-bit integer as decimal. +-- This encoding never starts with a zero unless the argument was zero. word64Dec :: Word64 -> Builder 19 -word64Dec (W64# w) = wordCommonDec# (word64ToWord# w) +word64Dec (W64# w) = wordCommonDec# +#if MIN_VERSION_base(4,17,0) + (word64ToWord# w) +#else + w +#endif -{- | Requires up to 10 bytes. Encodes an unsigned 32-bit integer as decimal. -This encoding never starts with a zero unless the argument was zero. --} +-- | Requires up to 10 bytes. Encodes an unsigned 32-bit integer as decimal. +-- This encoding never starts with a zero unless the argument was zero. word32Dec :: Word32 -> Builder 10 word32Dec (W32# w) = wordCommonDec# (C.word32ToWord# w) -{- | Requires up to 5 bytes. Encodes an unsigned 16-bit integer as decimal. -This encoding never starts with a zero unless the argument was zero. --} +-- | Requires up to 5 bytes. Encodes an unsigned 16-bit integer as decimal. +-- This encoding never starts with a zero unless the argument was zero. word16Dec :: Word16 -> Builder 5 word16Dec (W16# w) = wordCommonDec# (C.word16ToWord# w) -{- | Requires up to 3 bytes. Encodes an unsigned 8-bit integer as decimal. -This encoding never starts with a zero unless the argument was zero. --} +-- | Requires up to 3 bytes. Encodes an unsigned 8-bit integer as decimal. +-- This encoding never starts with a zero unless the argument was zero. word8Dec :: Word8 -> Builder 3 word8Dec (W8# w) = -- We unroll the loop when encoding Word8s. This speeds things @@ -296,105 +268,106 @@ word8Dec (W8# w) = -- L1 cache in real applications. word8Dec# (C.word8ToWord# w) -{- | Requires up to 19 bytes. Encodes an unsigned machine-sized integer -as decimal. This encoding never starts with a zero unless the argument -was zero. --} +-- | Requires up to 19 bytes. Encodes an unsigned machine-sized integer +-- as decimal. This encoding never starts with a zero unless the argument +-- was zero. wordDec :: Word -> Builder 19 wordDec (W# w) = wordCommonDec# w -{- | Requires up to 20 bytes. Encodes a signed 64-bit integer as decimal. -This encoding never starts with a zero unless the argument was zero. -Negative numbers are preceded by a minus sign. Positive numbers -are not preceded by anything. --} +-- | Requires up to 20 bytes. Encodes a signed 64-bit integer as decimal. +-- This encoding never starts with a zero unless the argument was zero. +-- Negative numbers are preceded by a minus sign. Positive numbers +-- are not preceded by anything. int64Dec :: Int64 -> Builder 20 -int64Dec (I64# w) = intCommonDec# (int64ToInt# w) +int64Dec (I64# w) = intCommonDec# +#if MIN_VERSION_base(4,17,0) + (int64ToInt# w) +#else + w +#endif -{- | Requires up to 11 bytes. Encodes a signed 32-bit integer as decimal. -This encoding never starts with a zero unless the argument was zero. -Negative numbers are preceded by a minus sign. Positive numbers -are not preceded by anything. --} +-- | Requires up to 11 bytes. Encodes a signed 32-bit integer as decimal. +-- This encoding never starts with a zero unless the argument was zero. +-- Negative numbers are preceded by a minus sign. Positive numbers +-- are not preceded by anything. int32Dec :: Int32 -> Builder 11 int32Dec (I32# w) = intCommonDec# (C.int32ToInt# w) -{- | Requires up to 6 bytes. Encodes a signed 16-bit integer as decimal. -This encoding never starts with a zero unless the argument was zero. -Negative numbers are preceded by a minus sign. Positive numbers -are not preceded by anything. --} +-- | Requires up to 6 bytes. Encodes a signed 16-bit integer as decimal. +-- This encoding never starts with a zero unless the argument was zero. +-- Negative numbers are preceded by a minus sign. Positive numbers +-- are not preceded by anything. int16Dec :: Int16 -> Builder 6 int16Dec (I16# w) = intCommonDec# (C.int16ToInt# w) -{- | Requires up to 4 bytes. Encodes a signed 8-bit integer as decimal. -This encoding never starts with a zero unless the argument was zero. -Negative numbers are preceded by a minus sign. Positive numbers -are not preceded by anything. --} +-- | Requires up to 4 bytes. Encodes a signed 8-bit integer as decimal. +-- This encoding never starts with a zero unless the argument was zero. +-- Negative numbers are preceded by a minus sign. Positive numbers +-- are not preceded by anything. int8Dec :: Int8 -> Builder 4 int8Dec (I8# w) = intCommonDec# (C.int8ToInt# w) -{- | Requires up to 20 bytes. Encodes a signed machine-sized integer -as decimal. This encoding never starts with a zero unless the -argument was zero. Negative numbers are preceded by a minus sign. -Positive numbers are not preceded by anything. --} +-- | Requires up to 20 bytes. Encodes a signed machine-sized integer +-- as decimal. This encoding never starts with a zero unless the +-- argument was zero. Negative numbers are preceded by a minus sign. +-- Positive numbers are not preceded by anything. intDec :: Int -> Builder 20 intDec (I# w) = intCommonDec# w word8Dec# :: Word# -> Builder 3 -{-# NOINLINE word8Dec# #-} +{-# noinline word8Dec# #-} word8Dec# w# = Unsafe.construct $ \arr off0 -> do - let !(I# off0#) = off0 - !(!x, !ones) = quotRem w 10 - !(hundreds@(W# hundreds#), tens@(W# tens#)) = quotRem x 10 + let !(I# off0# ) = off0 + !(!x,!ones) = quotRem w 10 + !(hundreds@(W# hundreds# ),tens@(W# tens# )) = quotRem x 10 writeByteArray arr off0 (fromIntegral (hundreds + 0x30) :: Word8) let !hasHundreds = gtWord# hundreds# 0## - !off1@(I# off1#) = I# (off0# +# hasHundreds) + !off1@(I# off1# ) = I# (off0# +# hasHundreds) writeByteArray arr off1 (fromIntegral (tens + 0x30) :: Word8) - let !off2 = I# (off1# +# (orI# hasHundreds (gtWord# tens# 0##))) + let !off2 = I# (off1# +# (orI# hasHundreds (gtWord# tens# 0## ))) writeByteArray arr off2 (fromIntegral (ones + 0x30) :: Word8) pure (off2 + 1) - where + where w = W# w# -- Requires a number of bytes that is bounded by the size of -- the word. This is only used internally. wordCommonDec# :: Word# -> Builder n -{-# NOINLINE wordCommonDec# #-} -wordCommonDec# w# = Unsafe.construct $ \arr off0 -> - if w /= 0 - then internalWordLoop arr off0 (W# w#) - else do - writeByteArray arr off0 (c2w '0') - pure (off0 + 1) - where - w = W64# (wordToWord64# w#) +{-# noinline wordCommonDec# #-} +wordCommonDec# w# = Unsafe.construct $ \arr off0 -> if w /= 0 + then internalWordLoop arr off0 (W# w#) + else do + writeByteArray arr off0 (c2w '0') + pure (off0 + 1) + where + w = W64# +#if MIN_VERSION_base(4,17,0) + (wordToWord64# w#) +#else + w# +#endif internalWordLoop :: MutableByteArray s -> Int -> Word -> ST s Int -{-# INLINE internalWordLoop #-} +{-# inline internalWordLoop #-} internalWordLoop arr off0 x0 = do off1 <- backwardsWordLoop arr off0 x0 reverseBytes arr off0 (off1 - 1) pure off1 backwardsWordLoop :: MutableByteArray s -> Int -> Word -> ST s Int -{-# INLINE backwardsWordLoop #-} -backwardsWordLoop arr off0 x0 = go off0 x0 - where - go !off !(x :: Word) = - if x > 0 - then do - let (y, z) = quotRem x 10 - writeByteArray arr off (fromIntegral (z + 0x30) :: Word8) - go (off + 1) y - else pure off +{-# inline backwardsWordLoop #-} +backwardsWordLoop arr off0 x0 = go off0 x0 where + go !off !(x :: Word) = if x > 0 + then do + let (y,z) = quotRem x 10 + writeByteArray arr off (fromIntegral (z + 0x30) :: Word8) + go (off + 1) y + else pure off -- Requires up to 20 bytes. Can be less depending on what the -- size of the argument is known to be. Unsafe. intCommonDec# :: Int# -> Builder n -{-# NOINLINE intCommonDec# #-} +{-# noinline intCommonDec# #-} intCommonDec# w# = Unsafe.construct $ \arr off0 -> case compare w 0 of GT -> internalWordLoop arr off0 (fromIntegral w) EQ -> do @@ -403,8 +376,13 @@ intCommonDec# w# = Unsafe.construct $ \arr off0 -> case compare w 0 of LT -> do writeByteArray arr off0 (c2w '-') internalWordLoop arr (off0 + 1) (fromIntegral (negate w)) - where - w = I64# (intToInt64# w#) + where + w = I64# +#if MIN_VERSION_base(4,17,0) + (intToInt64# w#) +#else + w# +#endif -- Convert a number between 0 and 16 to the ASCII -- representation of its hexadecimal character. @@ -412,11 +390,10 @@ intCommonDec# w# = Unsafe.construct $ \arr off0 -> case compare w 0 of -- unneeded bitmask. This actually needs a Word64 -- argument. toHexUpper :: Word -> Word8 -toHexUpper w' = - fromIntegral $ - (complement theMask .&. loSolved) - .|. (theMask .&. hiSolved) - where +toHexUpper w' = fromIntegral + $ (complement theMask .&. loSolved) + .|. (theMask .&. hiSolved) + where w = w' .&. 0xF -- This is all ones if the value was >= 10 theMask = (1 .&. unsafeShiftR (w - 10) 63) - 1 @@ -424,161 +401,162 @@ toHexUpper w' = hiSolved = w + 55 toHexLower :: Word -> Word8 -toHexLower w' = - fromIntegral $ - (complement theMask .&. loSolved) - .|. (theMask .&. hiSolved) - where +toHexLower w' = fromIntegral + $ (complement theMask .&. loSolved) + .|. (theMask .&. hiSolved) + where w = w' .&. 0xF -- This is all ones if the value was >= 10 theMask = (1 .&. unsafeShiftR (w - 10) 63) - 1 loSolved = w + 48 hiSolved = w + 87 -{- | Requires exactly 64 bytes. Encodes a 256-bit unsigned integer as -hexadecimal, zero-padding the encoding to 64 digits. This uses -lowercase for the alphabetical digits. --} +-- | Requires exactly 64 bytes. Encodes a 256-bit unsigned integer as +-- hexadecimal, zero-padding the encoding to 64 digits. This uses +-- lowercase for the alphabetical digits. word256PaddedLowerHex :: Word256 -> Builder 64 word256PaddedLowerHex (Word256 w192 w128 w64 w0) = - word64PaddedLowerHex w192 - `append` word64PaddedLowerHex w128 - `append` word64PaddedLowerHex w64 - `append` word64PaddedLowerHex w0 + word64PaddedLowerHex w192 + `append` word64PaddedLowerHex w128 + `append` word64PaddedLowerHex w64 + `append` word64PaddedLowerHex w0 -{- | Requires exactly 64 bytes. Encodes a 256-bit unsigned integer as -hexadecimal, zero-padding the encoding to 64 digits. This uses -uppercase for the alphabetical digits. --} +-- | Requires exactly 64 bytes. Encodes a 256-bit unsigned integer as +-- hexadecimal, zero-padding the encoding to 64 digits. This uses +-- uppercase for the alphabetical digits. word256PaddedUpperHex :: Word256 -> Builder 64 word256PaddedUpperHex (Word256 w192 w128 w64 w0) = - word64PaddedUpperHex w192 - `append` word64PaddedUpperHex w128 - `append` word64PaddedUpperHex w64 - `append` word64PaddedUpperHex w0 + word64PaddedUpperHex w192 + `append` word64PaddedUpperHex w128 + `append` word64PaddedUpperHex w64 + `append` word64PaddedUpperHex w0 -{- | Requires exactly 32 bytes. Encodes a 128-bit unsigned integer as -hexadecimal, zero-padding the encoding to 32 digits. This uses -lowercase for the alphabetical digits. --} + +-- | Requires exactly 32 bytes. Encodes a 128-bit unsigned integer as +-- hexadecimal, zero-padding the encoding to 32 digits. This uses +-- lowercase for the alphabetical digits. word128PaddedLowerHex :: Word128 -> Builder 32 word128PaddedLowerHex (Word128 w64 w0) = - word64PaddedLowerHex w64 - `append` word64PaddedLowerHex w0 + word64PaddedLowerHex w64 + `append` word64PaddedLowerHex w0 -{- | Requires exactly 32 bytes. Encodes a 128-bit unsigned integer as -hexadecimal, zero-padding the encoding to 32 digits. This uses -uppercase for the alphabetical digits. --} +-- | Requires exactly 32 bytes. Encodes a 128-bit unsigned integer as +-- hexadecimal, zero-padding the encoding to 32 digits. This uses +-- uppercase for the alphabetical digits. word128PaddedUpperHex :: Word128 -> Builder 32 word128PaddedUpperHex (Word128 w64 w0) = - word64PaddedUpperHex w64 - `append` word64PaddedUpperHex w0 + word64PaddedUpperHex w64 + `append` word64PaddedUpperHex w0 -{- | Requires exactly 16 bytes. Encodes a 64-bit unsigned integer as -hexadecimal, zero-padding the encoding to 16 digits. This uses -uppercase for the alphabetical digits. For example, this encodes the -number 1022 as @00000000000003FE@. --} + +-- | Requires exactly 16 bytes. Encodes a 64-bit unsigned integer as +-- hexadecimal, zero-padding the encoding to 16 digits. This uses +-- uppercase for the alphabetical digits. For example, this encodes the +-- number 1022 as @00000000000003FE@. word64PaddedUpperHex :: Word64 -> Builder 16 -word64PaddedUpperHex (W64# w) = word64PaddedUpperHex# (word64ToWord# w) +word64PaddedUpperHex (W64# w) = word64PaddedUpperHex# +#if MIN_VERSION_base(4,17,0) + (word64ToWord# w) +#else + w +#endif -{- | Requires exactly 16 bytes. Encodes a 64-bit unsigned integer as -hexadecimal, zero-padding the encoding to 16 digits. This uses -lowercase for the alphabetical digits. For example, this encodes the -number 1022 as @00000000000003fe@. --} + +-- | Requires exactly 16 bytes. Encodes a 64-bit unsigned integer as +-- hexadecimal, zero-padding the encoding to 16 digits. This uses +-- lowercase for the alphabetical digits. For example, this encodes the +-- number 1022 as @00000000000003fe@. word64PaddedLowerHex :: Word64 -> Builder 16 -word64PaddedLowerHex (W64# w) = word64PaddedLowerHex# (word64ToWord# w) +word64PaddedLowerHex (W64# w) = word64PaddedLowerHex# +#if MIN_VERSION_base(4,17,0) + (word64ToWord# w) +#else + w +#endif -{- | Requires exactly 12 bytes. Discards the upper 16 bits of a -64-bit unsigned integer and then encodes the lower 48 bits as -hexadecimal, zero-padding the encoding to 12 digits. This uses -lowercase for the alphabetical digits. For example, this encodes the -number 1022 as @0000000003fe@. --} +-- | Requires exactly 12 bytes. Discards the upper 16 bits of a +-- 64-bit unsigned integer and then encodes the lower 48 bits as +-- hexadecimal, zero-padding the encoding to 12 digits. This uses +-- lowercase for the alphabetical digits. For example, this encodes the +-- number 1022 as @0000000003fe@. word48PaddedLowerHex :: Word64 -> Builder 12 -word48PaddedLowerHex (W64# w) = word48PaddedLowerHex# (word64ToWord# w) +word48PaddedLowerHex (W64# w) = word48PaddedLowerHex# +#if MIN_VERSION_base(4,17,0) + (word64ToWord# w) +#else + w +#endif -{- | Requires exactly 8 bytes. Encodes a 32-bit unsigned integer as -hexadecimal, zero-padding the encoding to 8 digits. This uses -uppercase for the alphabetical digits. --} +-- | Requires exactly 8 bytes. Encodes a 32-bit unsigned integer as +-- hexadecimal, zero-padding the encoding to 8 digits. This uses +-- uppercase for the alphabetical digits. word32PaddedUpperHex :: Word32 -> Builder 8 word32PaddedUpperHex (W32# w) = word32PaddedUpperHex# (C.word32ToWord# w) -{- | Requires exactly 8 bytes. Encodes a 32-bit unsigned integer as -hexadecimal, zero-padding the encoding to 8 digits. This uses -lowercase for the alphabetical digits. --} +-- | Requires exactly 8 bytes. Encodes a 32-bit unsigned integer as +-- hexadecimal, zero-padding the encoding to 8 digits. This uses +-- lowercase for the alphabetical digits. word32PaddedLowerHex :: Word32 -> Builder 8 word32PaddedLowerHex (W32# w) = word32PaddedLowerHex# (C.word32ToWord# w) -{- | Requires exactly 4 bytes. Encodes a 16-bit unsigned integer as -hexadecimal, zero-padding the encoding to 4 digits. This uses -uppercase for the alphabetical digits. - ->>> word16PaddedUpperHex 0xab0 -0AB0 --} +-- | Requires exactly 4 bytes. Encodes a 16-bit unsigned integer as +-- hexadecimal, zero-padding the encoding to 4 digits. This uses +-- uppercase for the alphabetical digits. +-- +-- >>> word16PaddedUpperHex 0xab0 +-- 0AB0 word16PaddedUpperHex :: Word16 -> Builder 4 word16PaddedUpperHex (W16# w) = word16PaddedUpperHex# (C.word16ToWord# w) -{- | Requires exactly 4 bytes. Encodes a 16-bit unsigned integer as -hexadecimal, zero-padding the encoding to 4 digits. This uses -lowercase for the alphabetical digits. - ->>> word16PaddedLowerHex 0xab0 -0ab0 --} +-- | Requires exactly 4 bytes. Encodes a 16-bit unsigned integer as +-- hexadecimal, zero-padding the encoding to 4 digits. This uses +-- lowercase for the alphabetical digits. +-- +-- >>> word16PaddedLowerHex 0xab0 +-- 0ab0 word16PaddedLowerHex :: Word16 -> Builder 4 word16PaddedLowerHex (W16# w) = word16PaddedLowerHex# (C.word16ToWord# w) -{- | Requires at most 4 bytes. Encodes a 16-bit unsigned integer as -hexadecimal. No leading zeroes are displayed. Letters are presented -in lowercase. If the number is zero, a single zero digit is used. - ->>> word16LowerHex 0xab0 -ab0 --} +-- | Requires at most 4 bytes. Encodes a 16-bit unsigned integer as +-- hexadecimal. No leading zeroes are displayed. Letters are presented +-- in lowercase. If the number is zero, a single zero digit is used. +-- +-- >>> word16LowerHex 0xab0 +-- ab0 word16LowerHex :: Word16 -> Builder 4 word16LowerHex (W16# w) = word16LowerHex# (C.word16ToWord# w) -{- | Requires at most 4 bytes. Encodes a 16-bit unsigned integer as -hexadecimal. No leading zeroes are displayed. Letters are presented -in uppercase. If the number is zero, a single zero digit is used. - ->>> word16UpperHex 0xab0 -AB0 --} +-- | Requires at most 4 bytes. Encodes a 16-bit unsigned integer as +-- hexadecimal. No leading zeroes are displayed. Letters are presented +-- in uppercase. If the number is zero, a single zero digit is used. +-- +-- >>> word16UpperHex 0xab0 +-- AB0 word16UpperHex :: Word16 -> Builder 4 word16UpperHex (W16# w) = word16UpperHex# (C.word16ToWord# w) -{- | Requires at most 2 bytes. Encodes a 8-bit unsigned integer as -hexadecimal. No leading zeroes are displayed. If the number is zero, -a single zero digit is used. --} +-- | Requires at most 2 bytes. Encodes a 8-bit unsigned integer as +-- hexadecimal. No leading zeroes are displayed. If the number is zero, +-- a single zero digit is used. word8LowerHex :: Word8 -> Builder 2 word8LowerHex (W8# w) = word8LowerHex# (C.word8ToWord# w) -{- | Requires exactly 2 bytes. Encodes a 8-bit unsigned integer as -hexadecimal, zero-padding the encoding to 2 digits. This uses -uppercase for the alphabetical digits. --} +-- | Requires exactly 2 bytes. Encodes a 8-bit unsigned integer as +-- hexadecimal, zero-padding the encoding to 2 digits. This uses +-- uppercase for the alphabetical digits. word8PaddedUpperHex :: Word8 -> Builder 2 word8PaddedUpperHex (W8# w) = word8PaddedUpperHex# (C.word8ToWord# w) -{- | Requires exactly 2 bytes. Encodes a 8-bit unsigned integer as -hexadecimal, zero-padding the encoding to 2 digits. This uses -lowercase for the alphabetical digits. --} +-- | Requires exactly 2 bytes. Encodes a 8-bit unsigned integer as +-- hexadecimal, zero-padding the encoding to 2 digits. This uses +-- lowercase for the alphabetical digits. word8PaddedLowerHex :: Word8 -> Builder 2 word8PaddedLowerHex (W8# w) = word8PaddedLowerHex# (C.word8ToWord# w) -- TODO: Is it actually worth unrolling this loop. I suspect that it -- might not be. Benchmark this. word64PaddedUpperHex# :: Word# -> Builder 16 -{-# NOINLINE word64PaddedUpperHex# #-} +{-# noinline word64PaddedUpperHex# #-} word64PaddedUpperHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr off (toHexUpper (unsafeShiftR w 60)) writeByteArray arr (off + 1) (toHexUpper (unsafeShiftR w 56)) @@ -597,13 +575,13 @@ word64PaddedUpperHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr (off + 14) (toHexUpper (unsafeShiftR w 4)) writeByteArray arr (off + 15) (toHexUpper (unsafeShiftR w 0)) pure (off + 16) - where + where w = W# w# -- TODO: Is it actually worth unrolling this loop. I suspect that it -- might not be. Benchmark this. word48PaddedLowerHex# :: Word# -> Builder 12 -{-# NOINLINE word48PaddedLowerHex# #-} +{-# noinline word48PaddedLowerHex# #-} word48PaddedLowerHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr off (toHexLower (unsafeShiftR w 44)) writeByteArray arr (off + 1) (toHexLower (unsafeShiftR w 40)) @@ -618,13 +596,13 @@ word48PaddedLowerHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr (off + 10) (toHexLower (unsafeShiftR w 4)) writeByteArray arr (off + 11) (toHexLower w) pure (off + 12) - where + where w = W# w# -- TODO: Is it actually worth unrolling this loop. I suspect that it -- might not be. Benchmark this. word64PaddedLowerHex# :: Word# -> Builder 16 -{-# NOINLINE word64PaddedLowerHex# #-} +{-# noinline word64PaddedLowerHex# #-} word64PaddedLowerHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr off (toHexLower (unsafeShiftR w 60)) writeByteArray arr (off + 1) (toHexLower (unsafeShiftR w 56)) @@ -643,11 +621,11 @@ word64PaddedLowerHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr (off + 14) (toHexLower (unsafeShiftR w 4)) writeByteArray arr (off + 15) (toHexLower (unsafeShiftR w 0)) pure (off + 16) - where + where w = W# w# word32PaddedUpperHex# :: Word# -> Builder 8 -{-# NOINLINE word32PaddedUpperHex# #-} +{-# noinline word32PaddedUpperHex# #-} word32PaddedUpperHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr off (toHexUpper (unsafeShiftR w 28)) writeByteArray arr (off + 1) (toHexUpper (unsafeShiftR w 24)) @@ -658,11 +636,11 @@ word32PaddedUpperHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr (off + 6) (toHexUpper (unsafeShiftR w 4)) writeByteArray arr (off + 7) (toHexUpper (unsafeShiftR w 0)) pure (off + 8) - where + where w = W# w# word32PaddedLowerHex# :: Word# -> Builder 8 -{-# NOINLINE word32PaddedLowerHex# #-} +{-# noinline word32PaddedLowerHex# #-} word32PaddedLowerHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr off (toHexLower (unsafeShiftR w 28)) writeByteArray arr (off + 1) (toHexLower (unsafeShiftR w 24)) @@ -673,7 +651,7 @@ word32PaddedLowerHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr (off + 6) (toHexLower (unsafeShiftR w 4)) writeByteArray arr (off + 7) (toHexLower (unsafeShiftR w 0)) pure (off + 8) - where + where w = W# w# -- Not sure if it is beneficial to inline this. We just let @@ -686,7 +664,7 @@ word16PaddedUpperHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr (off + 2) (toHexUpper (unsafeShiftR w 4)) writeByteArray arr (off + 3) (toHexUpper (unsafeShiftR w 0)) pure (off + 4) - where + where w = W# w# word16PaddedLowerHex# :: Word# -> Builder 4 @@ -696,7 +674,7 @@ word16PaddedLowerHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr (off + 2) (toHexLower (unsafeShiftR w 4)) writeByteArray arr (off + 3) (toHexLower (unsafeShiftR w 0)) pure (off + 4) - where + where w = W# w# word12PaddedLowerHex# :: Word# -> Builder 3 @@ -705,7 +683,7 @@ word12PaddedLowerHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr (off + 1) (toHexLower (unsafeShiftR w 4)) writeByteArray arr (off + 2) (toHexLower (unsafeShiftR w 0)) pure (off + 3) - where + where w = W# w# word12PaddedUpperHex# :: Word# -> Builder 3 @@ -714,42 +692,42 @@ word12PaddedUpperHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr (off + 1) (toHexUpper (unsafeShiftR w 4)) writeByteArray arr (off + 2) (toHexUpper (unsafeShiftR w 0)) pure (off + 3) - where + where w = W# w# -- Definitely want this to inline. It's maybe a dozen instructions total. word8PaddedUpperHex# :: Word# -> Builder 2 -{-# INLINE word8PaddedUpperHex# #-} +{-# inline word8PaddedUpperHex# #-} word8PaddedUpperHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr off (toHexUpper (unsafeShiftR w 4)) writeByteArray arr (off + 1) (toHexUpper (unsafeShiftR w 0)) pure (off + 2) - where + where w = W# w# word8PaddedLowerHex# :: Word# -> Builder 2 -{-# INLINE word8PaddedLowerHex# #-} +{-# inline word8PaddedLowerHex# #-} word8PaddedLowerHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr off (toHexLower (unsafeShiftR w 4)) writeByteArray arr (off + 1) (toHexLower (unsafeShiftR w 0)) pure (off + 2) - where + where w = W# w# word4PaddedLowerHex# :: Word# -> Builder 1 -{-# INLINE word4PaddedLowerHex# #-} +{-# inline word4PaddedLowerHex# #-} word4PaddedLowerHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr off (toHexLower w) pure (off + 1) - where + where w = W# w# word4PaddedUpperHex# :: Word# -> Builder 1 -{-# INLINE word4PaddedUpperHex# #-} +{-# inline word4PaddedUpperHex# #-} word4PaddedUpperHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr off (toHexUpper w) pure (off + 1) - where + where w = W# w# word16UpperHex# :: Word# -> Builder 4 @@ -758,7 +736,7 @@ word16UpperHex# w# | w <= 0xFF = weaken Lte.constant (word8PaddedUpperHex# w#) | w <= 0xFFF = weaken Lte.constant (word12PaddedUpperHex# w#) | otherwise = word16PaddedUpperHex# w# - where + where w = W# w# word16LowerHex# :: Word# -> Builder 4 @@ -767,7 +745,7 @@ word16LowerHex# w# | w <= 0xFF = weaken Lte.constant (word8PaddedLowerHex# w#) | w <= 0xFFF = weaken Lte.constant (word12PaddedLowerHex# w#) | otherwise = word16PaddedLowerHex# w# - where + where w = W# w# -- Precondition: argument less than 256 @@ -775,18 +753,17 @@ word8LowerHex# :: Word# -> Builder 2 word8LowerHex# w# | w <= 0xF = weaken Lte.constant (word4PaddedLowerHex# w#) | otherwise = weaken Lte.constant (word8PaddedLowerHex# w#) - where + where w = W# w# -{- | Encode a number less than 100 as a decimal number, zero-padding it to -two digits. For example: 0 is encoded as @00@, 5 is encoded as @05@, and -73 is encoded as @73@. - -Precondition: Argument must be less than 100. Failure to satisfy this -precondition will not result in a segfault, but the resulting bytes are -undefined. The implemention uses a heuristic for division that is inaccurate -for large numbers. --} +-- | Encode a number less than 100 as a decimal number, zero-padding it to +-- two digits. For example: 0 is encoded as @00@, 5 is encoded as @05@, and +-- 73 is encoded as @73@. +-- +-- Precondition: Argument must be less than 100. Failure to satisfy this +-- precondition will not result in a segfault, but the resulting bytes are +-- undefined. The implemention uses a heuristic for division that is inaccurate +-- for large numbers. wordPaddedDec2 :: Word -> Builder 2 wordPaddedDec2 !w = Unsafe.construct $ \arr off -> do let d1 = approxDiv10 w @@ -795,201 +772,172 @@ wordPaddedDec2 !w = Unsafe.construct $ \arr off -> do writeByteArray arr (off + 1) (unsafeWordToWord8 (d2 + 48)) pure (off + 2) -{- | Encode a number less than 10000 as a decimal number, zero-padding it to -two digits. For example: 0 is encoded as @0000@, 5 is encoded as @0005@, -and 73 is encoded as @0073@. - -Precondition: Argument must be less than 10000. Failure to satisfy this -precondition will not result in a segfault, but the resulting bytes are -undefined. The implemention uses a heuristic for division that is inaccurate -for large numbers. --} +-- | Encode a number less than 10000 as a decimal number, zero-padding it to +-- two digits. For example: 0 is encoded as @0000@, 5 is encoded as @0005@, +-- and 73 is encoded as @0073@. +-- +-- Precondition: Argument must be less than 10000. Failure to satisfy this +-- precondition will not result in a segfault, but the resulting bytes are +-- undefined. The implemention uses a heuristic for division that is inaccurate +-- for large numbers. wordPaddedDec4 :: Word -> Builder 4 wordPaddedDec4 !w = Unsafe.construct $ \arr off -> do putRem10 - ( putRem10 $ - putRem10 $ - putRem10 - (\_ _ _ -> pure ()) - ) - arr - (off + 3) - w + (putRem10 $ putRem10 $ putRem10 + (\_ _ _ -> pure ()) + ) arr (off + 3) w pure (off + 4) -wordPaddedDec3 :: Word -> Builder 3 -wordPaddedDec3 !w = Unsafe.construct $ \arr off -> do - putRem10 - ( putRem10 $ - putRem10 - (\_ _ _ -> pure ()) - ) - arr - (off + 2) - w - pure (off + 3) - -{- | Encode a number less than 1e9 as a decimal number, zero-padding it to -nine digits. For example: 0 is encoded as @000000000@ and 5 is encoded as -@000000005@. - -Precondition: Argument must be less than 1e9. Failure to satisfy this -precondition will not result in a segfault, but the resulting bytes are -undefined. The implemention uses a heuristic for division that is inaccurate -for large numbers. --} +-- | Encode a number less than 1e9 as a decimal number, zero-padding it to +-- nine digits. For example: 0 is encoded as @000000000@ and 5 is encoded as +-- @000000005@. +-- +-- Precondition: Argument must be less than 1e9. Failure to satisfy this +-- precondition will not result in a segfault, but the resulting bytes are +-- undefined. The implemention uses a heuristic for division that is inaccurate +-- for large numbers. wordPaddedDec9 :: Word -> Builder 9 wordPaddedDec9 !w = Unsafe.construct $ \arr off -> do putRem10 - ( putRem10 $ - putRem10 $ - putRem10 $ - putRem10 $ - putRem10 $ - putRem10 $ - putRem10 $ - putRem10 - (\_ _ _ -> pure ()) - ) - arr - (off + 8) - w + (putRem10 $ putRem10 $ putRem10 $ putRem10 $ putRem10 $ + putRem10 $ putRem10 $ putRem10 + (\_ _ _ -> pure ()) + ) arr (off + 8) w pure (off + 9) putRem10 :: (MutableByteArray s -> Int -> Word -> ST s a) -> MutableByteArray s -> Int -> Word -> ST s a -{-# INLINE putRem10 #-} +{-# inline putRem10 #-} putRem10 andThen arr off dividend = do let quotient = approxDiv10 dividend remainder = dividend - (10 * quotient) writeByteArray arr off (unsafeWordToWord8 (remainder + 48)) andThen arr (off - 1) quotient -{- | Encode an ASCII character. -Precondition: Input must be an ASCII character. This is not checked. --} +-- | Encode an ASCII character. +-- Precondition: Input must be an ASCII character. This is not checked. ascii :: Char -> Builder 1 ascii (C# c) = Unsafe.construct $ \(MutableByteArray arr) (I# off) -> do primitive_ (writeCharArray# arr off c) - pure (I# (off +# 1#)) + pure (I# (off +# 1# )) -{- | Encode two ASCII characters. Precondition: Must be an ASCII characters. -This is not checked. --} +-- | Encode two ASCII characters. Precondition: Must be an ASCII characters. +-- This is not checked. ascii2 :: Char -> Char -> Builder 2 ascii2 (C# c0) (C# c1) = Unsafe.construct $ \(MutableByteArray arr) (I# off) -> do primitive_ (writeCharArray# arr off c0) - primitive_ (writeCharArray# arr (off +# 1#) c1) - pure (I# (off +# 2#)) + primitive_ (writeCharArray# arr (off +# 1# ) c1) + pure (I# (off +# 2# )) -{- | Encode three ASCII characters. Precondition: Must be an ASCII characters. -This is not checked. --} +-- | Encode three ASCII characters. Precondition: Must be an ASCII characters. +-- This is not checked. ascii3 :: Char -> Char -> Char -> Builder 3 ascii3 (C# c0) (C# c1) (C# c2) = Unsafe.construct $ \(MutableByteArray arr) (I# off) -> do primitive_ (writeCharArray# arr off c0) - primitive_ (writeCharArray# arr (off +# 1#) c1) - primitive_ (writeCharArray# arr (off +# 2#) c2) - pure (I# (off +# 3#)) + primitive_ (writeCharArray# arr (off +# 1# ) c1) + primitive_ (writeCharArray# arr (off +# 2# ) c2) + pure (I# (off +# 3# )) -{- | Encode four ASCII characters. Precondition: Must be an ASCII characters. -This is not checked. --} +-- | Encode four ASCII characters. Precondition: Must be an ASCII characters. +-- This is not checked. ascii4 :: Char -> Char -> Char -> Char -> Builder 4 ascii4 (C# c0) (C# c1) (C# c2) (C# c3) = Unsafe.construct $ \(MutableByteArray arr) (I# off) -> do primitive_ (writeCharArray# arr off c0) - primitive_ (writeCharArray# arr (off +# 1#) c1) - primitive_ (writeCharArray# arr (off +# 2#) c2) - primitive_ (writeCharArray# arr (off +# 3#) c3) - pure (I# (off +# 4#)) + primitive_ (writeCharArray# arr (off +# 1# ) c1) + primitive_ (writeCharArray# arr (off +# 2# ) c2) + primitive_ (writeCharArray# arr (off +# 3# ) c3) + pure (I# (off +# 4# )) -{- | Encode five ASCII characters. Precondition: Must be an ASCII characters. -This is not checked. --} +-- | Encode five ASCII characters. Precondition: Must be an ASCII characters. +-- This is not checked. ascii5 :: Char -> Char -> Char -> Char -> Char -> Builder 5 ascii5 (C# c0) (C# c1) (C# c2) (C# c3) (C# c4) = Unsafe.construct $ \(MutableByteArray arr) (I# off) -> do primitive_ (writeCharArray# arr off c0) - primitive_ (writeCharArray# arr (off +# 1#) c1) - primitive_ (writeCharArray# arr (off +# 2#) c2) - primitive_ (writeCharArray# arr (off +# 3#) c3) - primitive_ (writeCharArray# arr (off +# 4#) c4) - pure (I# (off +# 5#)) + primitive_ (writeCharArray# arr (off +# 1# ) c1) + primitive_ (writeCharArray# arr (off +# 2# ) c2) + primitive_ (writeCharArray# arr (off +# 3# ) c3) + primitive_ (writeCharArray# arr (off +# 4# ) c4) + pure (I# (off +# 5# )) -{- | Encode six ASCII characters. Precondition: Must be an ASCII characters. -This is not checked. --} +-- | Encode six ASCII characters. Precondition: Must be an ASCII characters. +-- This is not checked. ascii6 :: Char -> Char -> Char -> Char -> Char -> Char -> Builder 6 ascii6 (C# c0) (C# c1) (C# c2) (C# c3) (C# c4) (C# c5) = Unsafe.construct $ \(MutableByteArray arr) (I# off) -> do primitive_ (writeCharArray# arr off c0) - primitive_ (writeCharArray# arr (off +# 1#) c1) - primitive_ (writeCharArray# arr (off +# 2#) c2) - primitive_ (writeCharArray# arr (off +# 3#) c3) - primitive_ (writeCharArray# arr (off +# 4#) c4) - primitive_ (writeCharArray# arr (off +# 5#) c5) - pure (I# (off +# 6#)) + primitive_ (writeCharArray# arr (off +# 1# ) c1) + primitive_ (writeCharArray# arr (off +# 2# ) c2) + primitive_ (writeCharArray# arr (off +# 3# ) c3) + primitive_ (writeCharArray# arr (off +# 4# ) c4) + primitive_ (writeCharArray# arr (off +# 5# ) c5) + pure (I# (off +# 6# )) -{- | Encode seven ASCII characters. Precondition: Must be an ASCII characters. -This is not checked. --} +-- | Encode seven ASCII characters. Precondition: Must be an ASCII characters. +-- This is not checked. ascii7 :: Char -> Char -> Char -> Char -> Char -> Char -> Char -> Builder 7 ascii7 (C# c0) (C# c1) (C# c2) (C# c3) (C# c4) (C# c5) (C# c6) = Unsafe.construct $ \(MutableByteArray arr) (I# off) -> do primitive_ (writeCharArray# arr off c0) - primitive_ (writeCharArray# arr (off +# 1#) c1) - primitive_ (writeCharArray# arr (off +# 2#) c2) - primitive_ (writeCharArray# arr (off +# 3#) c3) - primitive_ (writeCharArray# arr (off +# 4#) c4) - primitive_ (writeCharArray# arr (off +# 5#) c5) - primitive_ (writeCharArray# arr (off +# 6#) c6) - pure (I# (off +# 7#)) + primitive_ (writeCharArray# arr (off +# 1# ) c1) + primitive_ (writeCharArray# arr (off +# 2# ) c2) + primitive_ (writeCharArray# arr (off +# 3# ) c3) + primitive_ (writeCharArray# arr (off +# 4# ) c4) + primitive_ (writeCharArray# arr (off +# 5# ) c5) + primitive_ (writeCharArray# arr (off +# 6# ) c6) + pure (I# (off +# 7# )) -{- | Encode eight ASCII characters. Precondition: Must be an ASCII characters. -This is not checked. --} +-- | Encode eight ASCII characters. Precondition: Must be an ASCII characters. +-- This is not checked. ascii8 :: Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Builder 8 ascii8 (C# c0) (C# c1) (C# c2) (C# c3) (C# c4) (C# c5) (C# c6) (C# c7) = Unsafe.construct $ \(MutableByteArray arr) (I# off) -> do primitive_ (writeCharArray# arr off c0) - primitive_ (writeCharArray# arr (off +# 1#) c1) - primitive_ (writeCharArray# arr (off +# 2#) c2) - primitive_ (writeCharArray# arr (off +# 3#) c3) - primitive_ (writeCharArray# arr (off +# 4#) c4) - primitive_ (writeCharArray# arr (off +# 5#) c5) - primitive_ (writeCharArray# arr (off +# 6#) c6) - primitive_ (writeCharArray# arr (off +# 7#) c7) - pure (I# (off +# 8#)) + primitive_ (writeCharArray# arr (off +# 1# ) c1) + primitive_ (writeCharArray# arr (off +# 2# ) c2) + primitive_ (writeCharArray# arr (off +# 3# ) c3) + primitive_ (writeCharArray# arr (off +# 4# ) c4) + primitive_ (writeCharArray# arr (off +# 5# ) c5) + primitive_ (writeCharArray# arr (off +# 6# ) c6) + primitive_ (writeCharArray# arr (off +# 7# ) c7) + pure (I# (off +# 8# )) -- | Encode a machine-sized word with VLQ (also known as VByte, Varint, VInt). wordVlq :: Word -> Builder 10 -{-# INLINE wordVlq #-} +{-# inline wordVlq #-} wordVlq (W# w) = vlqCommon (W# w) -- | Encode a 32-bit word with VLQ (also known as VByte, Varint, VInt). word32Vlq :: Word32 -> Builder 5 -{-# INLINE word32Vlq #-} +{-# inline word32Vlq #-} word32Vlq (W32# w) = vlqCommon (W# (C.word32ToWord# w)) -- | Encode a 64-bit word with VLQ (also known as VByte, Varint, VInt). word64Vlq :: Word64 -> Builder 10 -{-# INLINE word64Vlq #-} -word64Vlq (W64# w) = vlqCommon (W# (word64ToWord# w)) +{-# inline word64Vlq #-} +word64Vlq (W64# w) = vlqCommon (W# +#if MIN_VERSION_base(4,17,0) + (word64ToWord# w) +#else + w +#endif + ) -- | Encode a machine-sized word with LEB-128. wordLEB128 :: Word -> Builder 10 -{-# INLINE wordLEB128 #-} +{-# inline wordLEB128 #-} wordLEB128 (W# w) = lebCommon (W# w) --- | Encode a 32-bit word with LEB-128. -word16LEB128 :: Word16 -> Builder 3 -{-# INLINE word16LEB128 #-} -word16LEB128 (W16# w) = lebCommon (W# (C.word16ToWord# w)) - -- | Encode a 32-bit word with LEB-128. word32LEB128 :: Word32 -> Builder 5 -{-# INLINE word32LEB128 #-} +{-# inline word32LEB128 #-} word32LEB128 (W32# w) = lebCommon (W# (C.word32ToWord# w)) -- | Encode a 64-bit word with LEB-128. word64LEB128 :: Word64 -> Builder 10 -{-# INLINE word64LEB128 #-} -word64LEB128 (W64# w) = lebCommon (W# (word64ToWord# w)) +{-# inline word64LEB128 #-} +word64LEB128 (W64# w) = lebCommon (W# +#if MIN_VERSION_base(4,17,0) + (word64ToWord# w) +#else + w +#endif + ) vlqCommon :: Word -> Builder n vlqCommon !w = case w of @@ -999,25 +947,23 @@ vlqCommon !w = case w of in vlqStep startIx w vlqStep :: - Int -> -- start index, must be in range [0,63] and 7 must divide it evenly - Word -> - Builder n + Int -- start index, must be in range [0,63] and 7 must divide it evenly + -> Word + -> Builder n vlqStep !ix !w | ix <= 0 = unsafeWord8 (unsafeWordToWord8 (unsafeShiftR w ix .&. 0b0111_1111)) - | otherwise = - unsafeAppend - (unsafeWord8 (unsafeWordToWord8 (unsafeShiftR w ix .|. 0b1000_0000))) - (vlqStep (ix - 7) w) + | otherwise = unsafeAppend + (unsafeWord8 (unsafeWordToWord8 (unsafeShiftR w ix .|. 0b1000_0000))) + (vlqStep (ix - 7) w) lebCommon :: Word -> Builder n lebCommon !w = case quotRem w 128 of - (q, r) -> case q of + (q,r) -> case q of 0 -> unsafeWord8 (unsafeWordToWord8 r) - _ -> - unsafeAppend - (unsafeWord8 (unsafeWordToWord8 (r .|. 0x80))) - (lebCommon q) + _ -> unsafeAppend + (unsafeWord8 (unsafeWordToWord8 (r .|. 0x80))) + (lebCommon q) -- | Encode a character as UTF-8. This only uses as much space as is required. char :: Char -> Builder 4 @@ -1026,62 +972,68 @@ char c writeByteArray arr off (unsafeWordToWord8 codepoint) pure (off + 1) | codepoint < 0x800 = Unsafe.construct $ \arr off -> do - writeByteArray arr off (unsafeWordToWord8 (byteTwoOne codepoint)) + writeByteArray arr off (unsafeWordToWord8 (byteTwoOne codepoint)) writeByteArray arr (off + 1) (unsafeWordToWord8 (byteTwoTwo codepoint)) return (off + 2) | codepoint >= 0xD800 && codepoint < 0xE000 = Unsafe.construct $ \arr off -> do -- Codepoint U+FFFD - writeByteArray arr off (0xEF :: Word8) + writeByteArray arr off (0xEF :: Word8) writeByteArray arr (off + 1) (0xBF :: Word8) writeByteArray arr (off + 2) (0xBD :: Word8) return (off + 3) | codepoint < 0x10000 = Unsafe.construct $ \arr off -> do - writeByteArray arr off (unsafeWordToWord8 (byteThreeOne codepoint)) + writeByteArray arr off (unsafeWordToWord8 (byteThreeOne codepoint)) writeByteArray arr (off + 1) (unsafeWordToWord8 (byteThreeTwo codepoint)) writeByteArray arr (off + 2) (unsafeWordToWord8 (byteThreeThree codepoint)) return (off + 3) | otherwise = Unsafe.construct $ \arr off -> do - writeByteArray arr off (unsafeWordToWord8 (byteFourOne codepoint)) + writeByteArray arr off (unsafeWordToWord8 (byteFourOne codepoint)) writeByteArray arr (off + 1) (unsafeWordToWord8 (byteFourTwo codepoint)) writeByteArray arr (off + 2) (unsafeWordToWord8 (byteFourThree codepoint)) writeByteArray arr (off + 3) (unsafeWordToWord8 (byteFourFour codepoint)) return (off + 4) - where - codepoint :: Word - codepoint = fromIntegral (ord c) - -- precondition: codepoint is less than 0x800 - byteTwoOne :: Word -> Word - byteTwoOne w = unsafeShiftR w 6 .|. 0b11000000 + where + codepoint :: Word + codepoint = fromIntegral (ord c) - byteTwoTwo :: Word -> Word - byteTwoTwo w = (w .&. 0b00111111) .|. 0b10000000 + -- precondition: codepoint is less than 0x800 + byteTwoOne :: Word -> Word + byteTwoOne w = unsafeShiftR w 6 .|. 0b11000000 - -- precondition: codepoint is less than 0x1000 - byteThreeOne :: Word -> Word - byteThreeOne w = unsafeShiftR w 12 .|. 0b11100000 + byteTwoTwo :: Word -> Word + byteTwoTwo w = (w .&. 0b00111111) .|. 0b10000000 - byteThreeTwo :: Word -> Word - byteThreeTwo w = (0b00111111 .&. unsafeShiftR w 6) .|. 0b10000000 + -- precondition: codepoint is less than 0x1000 + byteThreeOne :: Word -> Word + byteThreeOne w = unsafeShiftR w 12 .|. 0b11100000 - byteThreeThree :: Word -> Word - byteThreeThree w = (w .&. 0b00111111) .|. 0b10000000 + byteThreeTwo :: Word -> Word + byteThreeTwo w = (0b00111111 .&. unsafeShiftR w 6) .|. 0b10000000 - -- precondition: codepoint is less than 0x110000 - byteFourOne :: Word -> Word - byteFourOne w = unsafeShiftR w 18 .|. 0b11110000 + byteThreeThree :: Word -> Word + byteThreeThree w = (w .&. 0b00111111) .|. 0b10000000 - byteFourTwo :: Word -> Word - byteFourTwo w = (0b00111111 .&. unsafeShiftR w 12) .|. 0b10000000 + -- precondition: codepoint is less than 0x110000 + byteFourOne :: Word -> Word + byteFourOne w = unsafeShiftR w 18 .|. 0b11110000 - byteFourThree :: Word -> Word - byteFourThree w = (0b00111111 .&. unsafeShiftR w 6) .|. 0b10000000 + byteFourTwo :: Word -> Word + byteFourTwo w = (0b00111111 .&. unsafeShiftR w 12) .|. 0b10000000 - byteFourFour :: Word -> Word - byteFourFour w = (0b00111111 .&. w) .|. 0b10000000 + byteFourThree :: Word -> Word + byteFourThree w = (0b00111111 .&. unsafeShiftR w 6) .|. 0b10000000 + + byteFourFour :: Word -> Word + byteFourFour w = (0b00111111 .&. w) .|. 0b10000000 int64BE :: Int64 -> Builder 8 -int64BE (I64# i) = word64BE (W64# (wordToWord64# (int2Word# (int64ToInt# i)))) +int64BE (I64# i) = word64BE (W64# ( +#if MIN_VERSION_base(4,17,0) + wordToWord64# (int2Word# (int64ToInt# i)))) +#else + int2Word# i)) +#endif int32BE :: Int32 -> Builder 4 int32BE (I32# i) = word32BE (W32# (C.wordToWord32# (int2Word# (C.int32ToInt# i)))) @@ -1090,7 +1042,13 @@ int16BE :: Int16 -> Builder 2 int16BE (I16# i) = word16BE (W16# (C.wordToWord16# (int2Word# (C.int16ToInt# i)))) int64LE :: Int64 -> Builder 8 -int64LE (I64# i) = word64LE (W64# (wordToWord64# (int2Word# (int64ToInt# i)))) +int64LE (I64# i) = word64LE (W64# ( +#if MIN_VERSION_base(4,17,0) + wordToWord64# (int2Word# (int64ToInt# i)))) +#else + int2Word# i)) +#endif + int32LE :: Int32 -> Builder 4 int32LE (I32# i) = word32LE (W32# (C.wordToWord32# (int2Word# (C.int32ToInt# i)))) @@ -1110,9 +1068,8 @@ word256LE (Word256 hi mhi mlo lo) = word64LE lo `append` word64LE mlo `append` w word256BE :: Word256 -> Builder 32 word256BE (Word256 hi mhi mlo lo) = word64BE hi `append` word64BE mhi `append` word64BE mlo `append` word64BE lo -{- | Requires exactly 8 bytes. Dump the octets of a 64-bit -word in a little-endian fashion. --} +-- | Requires exactly 8 bytes. Dump the octets of a 64-bit +-- word in a little-endian fashion. word64LE :: Word64 -> Builder 8 word64LE w = Unsafe.construct $ \arr off -> do writeByteArray arr (off + 7) (fromIntegral @Word64 @Word8 (unsafeShiftR w 56)) @@ -1122,15 +1079,14 @@ word64LE w = Unsafe.construct $ \arr off -> do writeByteArray arr (off + 3) (fromIntegral @Word64 @Word8 (unsafeShiftR w 24)) writeByteArray arr (off + 2) (fromIntegral @Word64 @Word8 (unsafeShiftR w 16)) writeByteArray arr (off + 1) (fromIntegral @Word64 @Word8 (unsafeShiftR w 8)) - writeByteArray arr (off) (fromIntegral @Word64 @Word8 w) + writeByteArray arr (off ) (fromIntegral @Word64 @Word8 w) pure (off + 8) -{- | Requires exactly 8 bytes. Dump the octets of a 64-bit -word in a big-endian fashion. --} +-- | Requires exactly 8 bytes. Dump the octets of a 64-bit +-- word in a big-endian fashion. word64BE :: Word64 -> Builder 8 word64BE w = Unsafe.construct $ \arr off -> do - writeByteArray arr (off) (fromIntegral @Word64 @Word8 (unsafeShiftR w 56)) + writeByteArray arr (off ) (fromIntegral @Word64 @Word8 (unsafeShiftR w 56)) writeByteArray arr (off + 1) (fromIntegral @Word64 @Word8 (unsafeShiftR w 48)) writeByteArray arr (off + 2) (fromIntegral @Word64 @Word8 (unsafeShiftR w 40)) writeByteArray arr (off + 3) (fromIntegral @Word64 @Word8 (unsafeShiftR w 32)) @@ -1140,43 +1096,39 @@ word64BE w = Unsafe.construct $ \arr off -> do writeByteArray arr (off + 7) (fromIntegral @Word64 @Word8 w) pure (off + 8) -{- | Requires exactly 4 bytes. Dump the octets of a 32-bit -word in a little-endian fashion. --} +-- | Requires exactly 4 bytes. Dump the octets of a 32-bit +-- word in a little-endian fashion. word32LE :: Word32 -> Builder 4 word32LE w = Unsafe.construct $ \arr off -> do writeByteArray arr (off + 3) (fromIntegral @Word32 @Word8 (unsafeShiftR w 24)) writeByteArray arr (off + 2) (fromIntegral @Word32 @Word8 (unsafeShiftR w 16)) writeByteArray arr (off + 1) (fromIntegral @Word32 @Word8 (unsafeShiftR w 8)) - writeByteArray arr (off) (fromIntegral @Word32 @Word8 w) + writeByteArray arr (off ) (fromIntegral @Word32 @Word8 w) pure (off + 4) -{- | Requires exactly 4 bytes. Dump the octets of a 32-bit -word in a big-endian fashion. --} +-- | Requires exactly 4 bytes. Dump the octets of a 32-bit +-- word in a big-endian fashion. word32BE :: Word32 -> Builder 4 word32BE w = Unsafe.construct $ \arr off -> do - writeByteArray arr (off) (fromIntegral @Word32 @Word8 (unsafeShiftR w 24)) + writeByteArray arr (off ) (fromIntegral @Word32 @Word8 (unsafeShiftR w 24)) writeByteArray arr (off + 1) (fromIntegral @Word32 @Word8 (unsafeShiftR w 16)) writeByteArray arr (off + 2) (fromIntegral @Word32 @Word8 (unsafeShiftR w 8)) writeByteArray arr (off + 3) (fromIntegral @Word32 @Word8 w) pure (off + 4) -{- | Requires exactly 2 bytes. Dump the octets of a 16-bit -word in a little-endian fashion. --} +-- | Requires exactly 2 bytes. Dump the octets of a 16-bit +-- word in a little-endian fashion. word16LE :: Word16 -> Builder 2 word16LE w = Unsafe.construct $ \arr off -> do writeByteArray arr (off + 1) (fromIntegral @Word16 @Word8 (unsafeShiftR w 8)) - writeByteArray arr (off) (fromIntegral @Word16 @Word8 w) + writeByteArray arr (off ) (fromIntegral @Word16 @Word8 w) pure (off + 2) -{- | Requires exactly 2 bytes. Dump the octets of a 16-bit -word in a big-endian fashion. --} +-- | Requires exactly 2 bytes. Dump the octets of a 16-bit +-- word in a big-endian fashion. word16BE :: Word16 -> Builder 2 word16BE w = Unsafe.construct $ \arr off -> do - writeByteArray arr (off) (fromIntegral @Word16 @Word8 (unsafeShiftR w 8)) + writeByteArray arr (off ) (fromIntegral @Word16 @Word8 (unsafeShiftR w 8)) writeByteArray arr (off + 1) (fromIntegral @Word16 @Word8 w) pure (off + 2) @@ -1193,18 +1145,16 @@ unsafeWord8 w = Unsafe.construct $ \arr off -> do -- Reverse the bytes in the designated slice. This takes -- an inclusive start offset and an inclusive end offset. reverseBytes :: MutableByteArray s -> Int -> Int -> ST s () -{-# INLINE reverseBytes #-} -reverseBytes arr begin end = go begin end - where - go ixA ixB = - if ixA < ixB - then do - a :: Word8 <- readByteArray arr ixA - b :: Word8 <- readByteArray arr ixB - writeByteArray arr ixA b - writeByteArray arr ixB a - go (ixA + 1) (ixB - 1) - else pure () +{-# inline reverseBytes #-} +reverseBytes arr begin end = go begin end where + go ixA ixB = if ixA < ixB + then do + a :: Word8 <- readByteArray arr ixA + b :: Word8 <- readByteArray arr ixB + writeByteArray arr ixA b + writeByteArray arr ixB a + go (ixA + 1) (ixB - 1) + else pure () c2w :: Char -> Word8 c2w = fromIntegral . ord @@ -1218,13 +1168,8 @@ shrinkMutableByteArray (MutableByteArray arr) (I# sz) = -- inaccurate. This is very visible when encoding a number like 2.25, which -- is perfectly represented as an IEEE 754 floating point number but is goofed -- up by this function. -doubleDec# :: - forall s. - Double# -> - MutableByteArray# s -> - Int# -> - State# s -> - (# State# s, Int# #) +doubleDec# :: forall s. + Double# -> MutableByteArray# s -> Int# -> State# s -> (# State# s, Int# #) doubleDec# d# marr# off# s0 = case unsafeIOToST (c_paste_double marr# off# d#) of ST f -> case f s0 of @@ -1238,6 +1183,5 @@ approxDiv10 !n = unsafeShiftR (0x1999999A * n) 32 unsafeWordToWord8 :: Word -> Word8 unsafeWordToWord8 (W# w) = W8# (C.wordToWord8# w) -foreign import ccall unsafe "bytebuild_paste_double" - c_paste_double :: - MutableByteArray# s -> Int# -> Double# -> IO Int +foreign import ccall unsafe "bytebuild_paste_double" c_paste_double :: + MutableByteArray# s -> Int# -> Double# -> IO Int diff --git a/src/Data/Bytes/Builder/Bounded/Class.hs b/src/Data/Bytes/Builder/Bounded/Class.hs index 29c16e5..9a630d1 100644 --- a/src/Data/Bytes/Builder/Bounded/Class.hs +++ b/src/Data/Bytes/Builder/Bounded/Class.hs @@ -1,8 +1,8 @@ -{-# LANGUAGE DataKinds #-} -{-# LANGUAGE TypeFamilies #-} +{-# language DataKinds #-} +{-# language TypeFamilies #-} module Data.Bytes.Builder.Bounded.Class - ( ToBoundedBuilder (..) + ( ToBoundedBuilder(..) ) where import Data.Int @@ -11,15 +11,14 @@ import Data.Word import qualified Data.Bytes.Builder.Bounded as Bounded import qualified GHC.TypeNats as GHC -{- | Variant of To that can be encoded as a builder. Human-readable encodings -are used when possible. For example, numbers are encoded an ascii-encoded -decimal characters. UTF-8 is preferred for textual types. For types -that represent arbitrary bytes (e.g. Bytes, ByteString), the bytes -are preserved. - -The goal of this typeclass is to reduce the size of builders produced -by quasiquotation. --} +-- | Variant of To that can be encoded as a builder. Human-readable encodings +-- are used when possible. For example, numbers are encoded an ascii-encoded +-- decimal characters. UTF-8 is preferred for textual types. For types +-- that represent arbitrary bytes (e.g. Bytes, ByteString), the bytes +-- are preserved. +-- +-- The goal of this typeclass is to reduce the size of builders produced +-- by quasiquotation. class ToBoundedBuilder a where type BoundedBuilderLength a :: GHC.Nat toBuilder :: a -> Bounded.Builder (BoundedBuilderLength a) diff --git a/src/Data/Bytes/Builder/Bounded/Unsafe.hs b/src/Data/Bytes/Builder/Bounded/Unsafe.hs index c983e04..3c812bc 100644 --- a/src/Data/Bytes/Builder/Bounded/Unsafe.hs +++ b/src/Data/Bytes/Builder/Bounded/Unsafe.hs @@ -1,66 +1,62 @@ -{-# LANGUAGE DataKinds #-} -{-# LANGUAGE GADTSyntax #-} -{-# LANGUAGE KindSignatures #-} -{-# LANGUAGE MagicHash #-} -{-# LANGUAGE RankNTypes #-} -{-# LANGUAGE ScopedTypeVariables #-} -{-# LANGUAGE UnboxedTuples #-} +{-# language DataKinds #-} +{-# language GADTSyntax #-} +{-# language KindSignatures #-} +{-# language MagicHash #-} +{-# language RankNTypes #-} +{-# language ScopedTypeVariables #-} +{-# language UnboxedTuples #-} module Data.Bytes.Builder.Bounded.Unsafe ( -- * Types - Builder (..) - + Builder(..) -- * Construct , construct - -- * Run , pasteST , pasteIO ) where import Data.Kind (Type) -import Data.Primitive (MutableByteArray (..)) -import GHC.Exts (Int (I#), Int#, MutableByteArray#, RealWorld, State#) +import Data.Primitive (MutableByteArray(..)) +import GHC.Exts (Int(I#),RealWorld,Int#,State#,MutableByteArray#) import GHC.IO (stToIO) -import GHC.ST (ST (ST)) +import GHC.ST (ST(ST)) import GHC.TypeLits (Nat) -{- | A builder parameterized by the maximum number of bytes it uses -when executed. --} +-- | A builder parameterized by the maximum number of bytes it uses +-- when executed. newtype Builder :: Nat -> Type where - Builder :: - -- | This function takes a buffer, an offset, and a number of remaining bytes. - -- It returns the new offset. - (forall s. MutableByteArray# s -> Int# -> State# s -> (# State# s, Int# #)) -> - Builder n + Builder :: + (forall s. MutableByteArray# s -> Int# -> State# s -> (# State# s, Int# #)) + -- ^ This function takes a buffer, an offset, and a number of remaining bytes. + -- It returns the new offset. + -> Builder n + -{- | Constructor for 'Builder' that works on a function with lifted -arguments instead of unlifted ones. This is just as unsafe as the -actual constructor. --} +-- | Constructor for 'Builder' that works on a function with lifted +-- arguments instead of unlifted ones. This is just as unsafe as the +-- actual constructor. construct :: (forall s. MutableByteArray s -> Int -> ST s Int) -> Builder n -{-# INLINE construct #-} -construct f = Builder $ - \arr off s0 -> +{-# inline construct #-} +construct f = Builder + $ \arr off s0 -> case unST (f (MutableByteArray arr) (I# off)) s0 of (# s1, (I# n) #) -> (# s1, n #) -{- | This function does not enforce the known upper bound on the -size. It is up to the user to do this. --} +-- | This function does not enforce the known upper bound on the +-- size. It is up to the user to do this. pasteST :: Builder n -> MutableByteArray s -> Int -> ST s Int -{-# INLINE pasteST #-} +{-# inline pasteST #-} pasteST (Builder f) (MutableByteArray arr) (I# off) = ST $ \s0 -> case f arr off s0 of (# s1, r #) -> (# s1, (I# r) #) -{- | This function does not enforce the known upper bound on the -size. It is up to the user to do this. --} +-- | This function does not enforce the known upper bound on the +-- size. It is up to the user to do this. pasteIO :: Builder n -> MutableByteArray RealWorld -> Int -> IO Int -{-# INLINE pasteIO #-} +{-# inline pasteIO #-} pasteIO b m off = stToIO (pasteST b m off) unST :: ST s a -> State# s -> (# State# s, a #) unST (ST f) = f + diff --git a/src/Data/Bytes/Builder/Class.hs b/src/Data/Bytes/Builder/Class.hs index fcaf303..bc9440c 100644 --- a/src/Data/Bytes/Builder/Class.hs +++ b/src/Data/Bytes/Builder/Class.hs @@ -1,12 +1,13 @@ {-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE TypeSynonymInstances #-} module Data.Bytes.Builder.Class - ( ToBuilder (..) + ( ToBuilder(..) ) where -import Data.ByteString.Short (ShortByteString) import Data.Bytes (Bytes) import Data.Bytes.Builder (Builder) +import Data.ByteString.Short (ShortByteString) import Data.Int import Data.Primitive.ByteArray (ByteArray) import Data.Text.Short (ShortText) @@ -14,15 +15,14 @@ import Data.Word import qualified Data.Bytes.Builder as Builder -{- | Types that can be encoded as a builder. Human-readable encodings -are used when possible. For example, numbers are encoded an ascii-encoded -decimal characters. UTF-8 is preferred for textual types. For types -that represent arbitrary bytes (e.g. Bytes, ByteString), the bytes -are preserved. - -The goal of this typeclass is to reduce the size of builders produced -by quasiquotation. --} +-- | Types that can be encoded as a builder. Human-readable encodings +-- are used when possible. For example, numbers are encoded an ascii-encoded +-- decimal characters. UTF-8 is preferred for textual types. For types +-- that represent arbitrary bytes (e.g. Bytes, ByteString), the bytes +-- are preserved. +-- +-- The goal of this typeclass is to reduce the size of builders produced +-- by quasiquotation. class ToBuilder a where toBuilder :: a -> Builder diff --git a/src/Data/Bytes/Builder/Template.hs b/src/Data/Bytes/Builder/Template.hs index f2b95dd..1fd775c 100644 --- a/src/Data/Bytes/Builder/Template.hs +++ b/src/Data/Bytes/Builder/Template.hs @@ -9,53 +9,50 @@ module Data.Bytes.Builder.Template import Control.Monad (when) import Data.Bytes.Builder.Class (toBuilder) -import GHC.Ptr (Ptr (Ptr)) +import GHC.Ptr (Ptr(Ptr)) import Language.Haskell.Meta.Parse (parseExp) -import Language.Haskell.TH (Exp, Q) -import Language.Haskell.TH.Lib (integerL, litE, stringPrimL) -import Language.Haskell.TH.Quote (QuasiQuoter (..)) +import Language.Haskell.TH (Q,Exp) +import Language.Haskell.TH.Lib (integerL,stringPrimL,litE) +import Language.Haskell.TH.Quote (QuasiQuoter(..)) -import qualified Data.ByteString.Short as SBS import qualified Data.Bytes.Builder as Builder +import qualified Data.ByteString.Short as SBS import qualified Data.Text.Short as TS import qualified Language.Haskell.TH as TH -{- | A quasiquoter for builders. Haskell expressions are interpolated -with backticks, and the @ToBuilder@ class is used to convert them -to builders. Several common escape sequences for whitespace and -control characters are recongized. Consider the following expression, -where the binding @partition@ has type @Word32@: - -> [templ|[WARN] Partition `partition` has invalid data.\n|] - -This expression has type @Builder@ and expands to: - -> Builder.cstringLen (Ptr "[WARN] Partition "#, 17) <> -> Builder.toBuilder partition <> -> Builder.cstringLen (Ptr " has invalid data.\n"#, 19) - -The @ToBuilder@ instance for @Word32@ uses decimal encoding, so this -would result in the following if @partition@ was 42 (with a newline -character at the end): - -> [WARN] Partition 42 has invalid data. - -In the future, a more sophisticated @bbldr@ variant will be added -that will support expressions where the maximum length of the entire -builder can be computed at compile time. --} +-- | A quasiquoter for builders. Haskell expressions are interpolated +-- with backticks, and the @ToBuilder@ class is used to convert them +-- to builders. Several common escape sequences for whitespace and +-- control characters are recongized. Consider the following expression, +-- where the binding @partition@ has type @Word32@: +-- +-- > [templ|[WARN] Partition `partition` has invalid data.\n|] +-- +-- This expression has type @Builder@ and expands to: +-- +-- > Builder.cstringLen (Ptr "[WARN] Partition "#, 17) <> +-- > Builder.toBuilder partition <> +-- > Builder.cstringLen (Ptr " has invalid data.\n"#, 19) +-- +-- The @ToBuilder@ instance for @Word32@ uses decimal encoding, so this +-- would result in the following if @partition@ was 42 (with a newline +-- character at the end): +-- +-- > [WARN] Partition 42 has invalid data. +-- +-- In the future, a more sophisticated @bbldr@ variant will be added +-- that will support expressions where the maximum length of the entire +-- builder can be computed at compile time. bldr :: QuasiQuoter -bldr = - QuasiQuoter - { quoteExp = templExp - , quotePat = notHandled "patterns" - , quoteType = notHandled "types" - , quoteDec = notHandled "declarations" - } - where - notHandled things _ = - fail $ - things ++ "are not handled by the byte template quasiquoter" +bldr = QuasiQuoter + { quoteExp = templExp + , quotePat = notHandled "patterns" + , quoteType = notHandled "types" + , quoteDec = notHandled "declarations" + } + where + notHandled things _ = fail $ + things ++ "are not handled by the byte template quasiquoter" templExp :: String -> Q Exp templExp inp = do @@ -65,7 +62,7 @@ templExp inp = do Right [] -> fail "empty template" Right v -> pure v let expParts = compile <$> rawParts - foldl1 (\e1 e2 -> [|$e1 <> $e2|]) expParts + foldl1 (\e1 e2 -> [| $e1 <> $e2 |]) expParts checkOverloadedStrings :: Q () checkOverloadedStrings = do @@ -90,40 +87,40 @@ compile (Splice str) = case parseExp str of parse :: String -> Either String Template parse = partsLoop - where + where partsLoop "" = do pure [] - partsLoop ('`' : inp) = do + partsLoop ('`':inp) = do (!spl, !rest) <- spliceLoop inp - (Splice spl :) <$> partsLoop rest + (Splice spl:) <$> partsLoop rest partsLoop inp = do (!lit, !rest) <- litLoop "" inp - (Literal lit :) <$> partsLoop rest + (Literal lit:) <$> partsLoop rest litLoop :: String -> String -> Either String (String, String) litLoop !acc rest@"" = pure (reverse acc, rest) - litLoop !acc rest@('`' : _) = pure (reverse acc, rest) - litLoop !acc ('\\' : next) = do + litLoop !acc rest@('`':_) = pure (reverse acc, rest) + litLoop !acc ('\\':next) = do (c, rest) <- parseEscape next - litLoop (c : acc) rest - litLoop !acc (c : rest) = litLoop (c : acc) rest + litLoop (c:acc) rest + litLoop !acc (c:rest) = litLoop (c:acc) rest spliceLoop :: String -> Either String (String, String) spliceLoop inp = case break (== '`') inp of ([], _) -> Left "internal error" - (hs, '`' : rest) -> pure (hs, rest) - (_, _ : _) -> Left "internal error" + (hs, '`':rest) -> pure (hs, rest) + (_, _:_) -> Left "internal error" (_, []) -> Left "unterminated interpolation" parseEscape :: String -> Either String (Char, String) parseEscape "" = Left "incomplete escape" - parseEscape ('\\' : rest) = pure ('\\', rest) - parseEscape ('`' : rest) = pure ('`', rest) - parseEscape ('\'' : rest) = pure ('\'', rest) - parseEscape ('\"' : rest) = pure ('\"', rest) - parseEscape ('0' : rest) = pure ('\0', rest) - parseEscape ('a' : rest) = pure ('\a', rest) - parseEscape ('b' : rest) = pure ('\b', rest) - parseEscape ('f' : rest) = pure ('\f', rest) - parseEscape ('n' : rest) = pure ('\n', rest) - parseEscape ('r' : rest) = pure ('\r', rest) - parseEscape ('t' : rest) = pure ('\t', rest) - parseEscape ('v' : rest) = pure ('\v', rest) - parseEscape (c : _) = Left $ "unrecognized escape: \\" ++ [c] + parseEscape ('\\':rest) = pure ('\\', rest) + parseEscape ('`':rest) = pure ('`', rest) + parseEscape ('\'':rest) = pure ('\'', rest) + parseEscape ('\"':rest) = pure ('\"', rest) + parseEscape ('0':rest) = pure ('\0', rest) + parseEscape ('a':rest) = pure ('\a', rest) + parseEscape ('b':rest) = pure ('\b', rest) + parseEscape ('f':rest) = pure ('\f', rest) + parseEscape ('n':rest) = pure ('\n', rest) + parseEscape ('r':rest) = pure ('\r', rest) + parseEscape ('t':rest) = pure ('\t', rest) + parseEscape ('v':rest) = pure ('\v', rest) + parseEscape (c:_) = Left $ "unrecognized escape: \\" ++ [c] diff --git a/src/Data/Bytes/Builder/Unsafe.hs b/src/Data/Bytes/Builder/Unsafe.hs index bcfbdf8..ab3a29d 100644 --- a/src/Data/Bytes/Builder/Unsafe.hs +++ b/src/Data/Bytes/Builder/Unsafe.hs @@ -1,60 +1,55 @@ -{-# LANGUAGE BangPatterns #-} -{-# LANGUAGE DuplicateRecordFields #-} -{-# LANGUAGE MagicHash #-} -{-# LANGUAGE RankNTypes #-} -{-# LANGUAGE ScopedTypeVariables #-} -{-# LANGUAGE UnboxedTuples #-} +{-# language BangPatterns #-} +{-# language DuplicateRecordFields #-} +{-# language LambdaCase #-} +{-# language MagicHash #-} +{-# language RankNTypes #-} +{-# language ScopedTypeVariables #-} +{-# language UnboxedTuples #-} module Data.Bytes.Builder.Unsafe ( -- * Types - Builder (..) - , BuilderState (..) - , Commits (..) - + Builder(..) + , BuilderState(..) + , Commits(..) -- * Execution , pasteST , pasteIO - -- * Construction , fromEffect - -- * Builder State , newBuilderState , closeBuilderState - -- * Finalization , reverseCommitsOntoChunks , commitsOntoChunks , copyReverseCommits , addCommitsLength - -- * Commit Distance , commitDistance , commitDistance1 - -- * Safe Functions - -- | These functions are actually completely safe, but they are defined -- here because they are used by typeclass instances. Import them from -- @Data.Bytes.Builder@ instead. , stringUtf8 , cstring - -- * Pasting with Preconditions , pasteUtf8TextJson# ) where import Control.Monad.Primitive (primitive_) -import Data.Bytes.Chunks (Chunks (ChunksCons)) -import Data.Bytes.Types (Bytes (Bytes)) +import Data.Bytes.Chunks (Chunks(ChunksCons)) +import Data.Bytes.Types (Bytes(Bytes)) import Data.Char (ord) -import Data.Primitive (ByteArray (..), MutableByteArray (..)) +import Data.Primitive (MutableByteArray(..),ByteArray(..)) import Data.Word (Word8) import Foreign.C.String (CString) -import GHC.Base (unpackCString#, unpackCStringUtf8#) -import GHC.Exts (Addr#, ByteArray#, Char (C#), Int (I#), Int#, IsString, MutableByteArray#, Ptr (Ptr), RealWorld, State#, (+#), (-#), (>#), (>=#)) +import GHC.Base (unpackCString#,unpackCStringUtf8#) +import GHC.Exts ((-#),(+#),(>#),(>=#),Char(C#)) +import GHC.Exts (Addr#,ByteArray#,MutableByteArray#,Int(I#),Ptr(Ptr)) +import GHC.Exts (RealWorld,IsString,Int#,State#) import GHC.IO (stToIO) -import GHC.ST (ST (ST)) +import GHC.ST (ST(ST)) import qualified Compat as C import qualified Data.Bytes.Builder.Bounded as Bounded @@ -63,52 +58,46 @@ import qualified Data.Primitive as PM import qualified GHC.Exts as Exts import qualified Op -{- | An unmaterialized sequence of bytes that may be pasted -into a mutable byte array. --} +-- | An unmaterialized sequence of bytes that may be pasted +-- into a mutable byte array. newtype Builder - = Builder - ( forall s. - MutableByteArray# s -> -- buffer we are currently writing to - Int# -> -- offset into the current buffer - Int# -> -- number of bytes remaining in the current buffer - Commits s -> -- buffers and immutable byte slices that we have already committed - State# s -> - (# State# s, MutableByteArray# s, Int#, Int#, Commits s #) -- all the same things - ) + = Builder (forall s. + MutableByteArray# s -> -- buffer we are currently writing to + Int# -> -- offset into the current buffer + Int# -> -- number of bytes remaining in the current buffer + Commits s -> -- buffers and immutable byte slices that we have already committed + State# s -> + (# State# s, MutableByteArray# s, Int#, Int#, Commits s #) -- all the same things + ) -{- | A list of committed chunks along with the chunk currently being -written to. This is kind of like a non-empty variant of 'Commmits' -but with the additional invariant that the head chunk is a mutable -byte array. --} -data BuilderState s - = BuilderState - (MutableByteArray# s) -- buffer we are currently writing to - Int# -- offset into the current buffer - Int# -- number of bytes remaining in the current buffer - !(Commits s) -- buffers and immutable byte slices that are already committed +-- | A list of committed chunks along with the chunk currently being +-- written to. This is kind of like a non-empty variant of 'Commmits' +-- but with the additional invariant that the head chunk is a mutable +-- byte array. +data BuilderState s = BuilderState + (MutableByteArray# s) -- buffer we are currently writing to + Int# -- offset into the current buffer + Int# -- number of bytes remaining in the current buffer + !(Commits s) -- buffers and immutable byte slices that are already committed -- | Create an empty 'BuilderState' with a buffer of the given size. newBuilderState :: Int -> ST s (BuilderState s) -{-# INLINE newBuilderState #-} -newBuilderState n@(I# n#) = do +{-# inline newBuilderState #-} +newBuilderState n@(I# n# ) = do MutableByteArray buf <- PM.newByteArray n pure (BuilderState buf 0# n# Initial) -{- | Push the active chunk onto the top of the commits. -The @BuilderState@ argument must not be reused after being passed -to this function. That is, its use must be affine. --} +-- | Push the active chunk onto the top of the commits. +-- The @BuilderState@ argument must not be reused after being passed +-- to this function. That is, its use must be affine. closeBuilderState :: BuilderState s -> Commits s closeBuilderState (BuilderState dst off _ cmts) = Mutable dst off cmts -{- | Run a builder, performing an in-place update on the state. -The @BuilderState@ argument must not be reused after being passed -to this function. That is, its use must be affine. --} +-- | Run a builder, performing an in-place update on the state. +-- The @BuilderState@ argument must not be reused after being passed +-- to this function. That is, its use must be affine. pasteST :: Builder -> BuilderState s -> ST s (BuilderState s) -{-# INLINE pasteST #-} +{-# inline pasteST #-} pasteST (Builder f) (BuilderState buf off len cmts) = ST $ \s0 -> case f buf off len cmts s0 of (# s1, buf1, off1, len1, cmts1 #) -> @@ -116,54 +105,48 @@ pasteST (Builder f) (BuilderState buf off len cmts) = ST $ \s0 -> -- | Variant of 'pasteST' that runs in 'IO'. pasteIO :: Builder -> BuilderState RealWorld -> IO (BuilderState RealWorld) -{-# INLINE pasteIO #-} +{-# inline pasteIO #-} pasteIO b st = stToIO (pasteST b st) instance IsString Builder where - {-# INLINE fromString #-} + {-# inline fromString #-} fromString = stringUtf8 instance Semigroup Builder where - {-# INLINE (<>) #-} + {-# inline (<>) #-} Builder f <> Builder g = Builder $ \buf0 off0 len0 cs0 s0 -> case f buf0 off0 len0 cs0 s0 of (# s1, buf1, off1, len1, cs1 #) -> g buf1 off1 len1 cs1 s1 instance Monoid Builder where - {-# INLINE mempty #-} + {-# inline mempty #-} mempty = Builder $ \buf0 off0 len0 cs0 s0 -> (# s0, buf0, off0, len0, cs0 #) data Commits s = Mutable - -- | Mutable buffer, start index implicitly zero (MutableByteArray# s) - -- | Length (may be smaller than actual length) - Int# + -- ^ Mutable buffer, start index implicitly zero + Int# -- ^ Length (may be smaller than actual length) !(Commits s) | Immutable - -- | Immutable chunk - ByteArray# - -- | Offset into chunk, not necessarily zero - Int# - -- | Length (may be smaller than actual length) - Int# + ByteArray# -- ^ Immutable chunk + Int# -- ^ Offset into chunk, not necessarily zero + Int# -- ^ Length (may be smaller than actual length) !(Commits s) | Initial -{- | Add the total number of bytes in the commits to first -argument. --} +-- | Add the total number of bytes in the commits to first +-- argument. addCommitsLength :: Int -> Commits s -> Int addCommitsLength !acc Initial = acc addCommitsLength !acc (Immutable _ _ x cs) = addCommitsLength (acc + I# x) cs addCommitsLength !acc (Mutable _ x cs) = addCommitsLength (acc + I# x) cs -{- | Cons the chunks from a list of @Commits@ onto an initial -@Chunks@ list (this argument is often @ChunksNil@). This reverses -the order of the chunks, which is desirable since builders assemble -@Commits@ with the chunks backwards. This performs an in-place shrink -and freezes any mutable byte arrays it encounters. Consequently, -these must not be reused. --} +-- | Cons the chunks from a list of @Commits@ onto an initial +-- @Chunks@ list (this argument is often @ChunksNil@). This reverses +-- the order of the chunks, which is desirable since builders assemble +-- @Commits@ with the chunks backwards. This performs an in-place shrink +-- and freezes any mutable byte arrays it encounters. Consequently, +-- these must not be reused. reverseCommitsOntoChunks :: Chunks -> Commits s -> ST s Chunks reverseCommitsOntoChunks !xs Initial = pure xs reverseCommitsOntoChunks !xs (Immutable arr off len cs) = @@ -176,18 +159,17 @@ reverseCommitsOntoChunks !xs (Mutable buf len cs) = case len of arr <- PM.unsafeFreezeByteArray (MutableByteArray buf) reverseCommitsOntoChunks (ChunksCons (Bytes arr 0 (I# len)) xs) cs -{- | Variant of 'reverseCommitsOntoChunks' that does not reverse -the order of the commits. Since commits are built backwards by -consing, this means that the chunks appended to the front will -be backwards. Within each chunk, however, the bytes will be in -the correct order. - -Unlike 'reverseCommitsOntoChunks', this function is not tail -recursive. --} +-- | Variant of 'reverseCommitsOntoChunks' that does not reverse +-- the order of the commits. Since commits are built backwards by +-- consing, this means that the chunks appended to the front will +-- be backwards. Within each chunk, however, the bytes will be in +-- the correct order. +-- +-- Unlike 'reverseCommitsOntoChunks', this function is not tail +-- recursive. commitsOntoChunks :: Chunks -> Commits s -> ST s Chunks commitsOntoChunks !xs0 cs0 = go cs0 - where + where go Initial = pure xs0 go (Immutable arr off len cs) = do xs <- go cs @@ -201,65 +183,53 @@ commitsOntoChunks !xs0 cs0 = go cs0 xs <- go cs pure $! ChunksCons (Bytes arr 0 (I# len)) xs -{- | Copy the contents of the chunks into a mutable array, reversing -the order of the chunks. -Precondition: The destination must have enough space to house the -contents. This is not checked. --} +-- | Copy the contents of the chunks into a mutable array, reversing +-- the order of the chunks. +-- Precondition: The destination must have enough space to house the +-- contents. This is not checked. copyReverseCommits :: - -- | Destination - MutableByteArray s -> - -- | Destination range successor - Int -> - -- | Source - Commits s -> - ST s Int -{-# INLINE copyReverseCommits #-} -copyReverseCommits (MutableByteArray dst) (I# off) cs = - ST - ( \s0 -> case copyReverseCommits# dst off cs s0 of - (# s1, nextOff #) -> (# s1, I# nextOff #) - ) + MutableByteArray s -- ^ Destination + -> Int -- ^ Destination range successor + -> Commits s -- ^ Source + -> ST s Int +{-# inline copyReverseCommits #-} +copyReverseCommits (MutableByteArray dst) (I# off) cs = ST + (\s0 -> case copyReverseCommits# dst off cs s0 of + (# s1, nextOff #) -> (# s1, I# nextOff #) + ) copyReverseCommits# :: - MutableByteArray# s -> - Int# -> - Commits s -> - State# s -> - (# State# s, Int# #) + MutableByteArray# s + -> Int# + -> Commits s + -> State# s + -> (# State# s, Int# #) copyReverseCommits# _ off Initial s0 = (# s0, off #) copyReverseCommits# marr prevOff (Mutable arr sz cs) s0 = - let !off = prevOff -# sz - in case Op.copyMutableByteArray# arr 0# marr off sz s0 of - s1 -> copyReverseCommits# marr off cs s1 + let !off = prevOff -# sz in + case Op.copyMutableByteArray# arr 0# marr off sz s0 of + s1 -> copyReverseCommits# marr off cs s1 copyReverseCommits# marr prevOff (Immutable arr soff sz cs) s0 = - let !off = prevOff -# sz - in case Op.copyByteArray# arr soff marr off sz s0 of - s1 -> copyReverseCommits# marr off cs s1 + let !off = prevOff -# sz in + case Op.copyByteArray# arr soff marr off sz s0 of + s1 -> copyReverseCommits# marr off cs s1 -{- | Create a builder from a cons-list of 'Char'. These -must be UTF-8 encoded. --} +-- | Create a builder from a cons-list of 'Char'. These +-- must be UTF-8 encoded. stringUtf8 :: String -> Builder -{-# INLINE stringUtf8 #-} +{-# inline stringUtf8 #-} stringUtf8 cs = Builder (goString cs) -{- | Create a builder from a @NUL@-terminated 'CString'. This ignores any -textual encoding, copying bytes until @NUL@ is reached. --} +-- | Create a builder from a @NUL@-terminated 'CString'. This ignores any +-- textual encoding, copying bytes until @NUL@ is reached. cstring :: CString -> Builder -{-# INLINE cstring #-} +{-# inline cstring #-} cstring (Ptr cs) = Builder (goCString cs) -goString :: - String -> - MutableByteArray# s -> - Int# -> - Int# -> - Commits s -> - State# s -> - (# State# s, MutableByteArray# s, Int#, Int#, Commits s #) -{-# NOINLINE goString #-} +goString :: String + -> MutableByteArray# s -> Int# -> Int# -> Commits s + -> State# s -> (# State# s, MutableByteArray# s, Int#, Int#, Commits s #) +{-# noinline goString #-} goString [] buf0 off0 len0 cs0 s0 = (# s0, buf0, off0, len0, cs0 #) goString (c : cs) buf0 off0 len0 cs0 s0 = case len0 ># 3# of 1# -> case unST (UnsafeBounded.pasteST (Bounded.char c) (MutableByteArray buf0) (I# off0)) s0 of @@ -275,53 +245,39 @@ goString (c : cs) buf0 off0 len0 cs0 s0 = case len0 ># 3# of -- used Modified UTF-8. {-# RULES "Builder stringUtf8/cstring" forall s a b c d e. - goString (unpackCString# s) a b c d e = - goCString s a b c d e + goString (unpackCString# s) a b c d e = goCString s a b c d e "Builder stringUtf8/cstring-utf8" forall s a b c d e. - goString (unpackCStringUtf8# s) a b c d e = - goCString s a b c d e - #-} + goString (unpackCStringUtf8# s) a b c d e = goCString s a b c d e +#-} -goCString :: - Addr# -> - MutableByteArray# s -> - Int# -> - Int# -> - Commits s -> - State# s -> - (# State# s, MutableByteArray# s, Int#, Int#, Commits s #) +goCString :: Addr# -> MutableByteArray# s -> Int# -> Int# -> Commits s + -> State# s -> (# State# s, MutableByteArray# s, Int#, Int#, Commits s #) goCString addr buf0 off0 len0 cs0 s0 = case C.word8ToWord# (Exts.indexWord8OffAddr# addr 0#) of 0## -> (# s0, buf0, off0, len0, cs0 #) w -> case len0 of 0# -> case Exts.newByteArray# 4080# s0 of (# s1, buf1 #) -> case Exts.writeWord8Array# buf1 0# (C.wordToWord8# w) s1 of - s2 -> - goCString - (Exts.plusAddr# addr 1#) - buf1 - 1# - (4080# -# 1#) - (Mutable buf0 off0 cs0) - s2 + s2 -> goCString + (Exts.plusAddr# addr 1# ) buf1 1# (4080# -# 1# ) + (Mutable buf0 off0 cs0) + s2 _ -> case Exts.writeWord8Array# buf0 off0 (C.wordToWord8# w) s0 of - s1 -> goCString (Exts.plusAddr# addr 1#) buf0 (off0 +# 1#) (len0 -# 1#) cs0 s1 + s1 -> goCString (Exts.plusAddr# addr 1# ) buf0 (off0 +# 1# ) (len0 -# 1# ) cs0 s1 fromEffect :: - -- | Maximum number of bytes the paste function needs - Int -> - -- | Paste function. Takes a byte array and an offset and returns - -- the new offset and having pasted into the buffer. - (forall s. MutableByteArray s -> Int -> ST s Int) -> - Builder -{-# INLINE fromEffect #-} + Int -- ^ Maximum number of bytes the paste function needs + -> (forall s. MutableByteArray s -> Int -> ST s Int) + -- ^ Paste function. Takes a byte array and an offset and returns + -- the new offset and having pasted into the buffer. + -> Builder +{-# inline fromEffect #-} fromEffect (I# req) f = Builder $ \buf0 off0 len0 cs0 s0 -> let !(# s1, buf1, off1, len1, cs1 #) = case len0 >=# req of 1# -> (# s0, buf0, off0, len0, cs0 #) - _ -> - let !(I# lenX) = max 4080 (I# req) - in case Exts.newByteArray# lenX s0 of - (# sX, bufX #) -> - (# sX, bufX, 0#, lenX, Mutable buf0 off0 cs0 #) + _ -> let !(I# lenX) = max 4080 (I# req) in + case Exts.newByteArray# lenX s0 of + (# sX, bufX #) -> + (# sX, bufX, 0#, lenX, Mutable buf0 off0 cs0 #) in case unST (f (MutableByteArray buf1) (I# off1)) s1 of (# s2, I# off2 #) -> (# s2, buf1, off2, len1 -# (off2 -# off1), cs1 #) @@ -332,26 +288,24 @@ shrinkMutableByteArray :: MutableByteArray s -> Int -> ST s () shrinkMutableByteArray (MutableByteArray arr) (I# sz) = primitive_ (Exts.shrinkMutableByteArray# arr sz) -{- | Variant of commitDistance where you get to supply a -head of the commit list that has not yet been committed. --} +-- | Variant of commitDistance where you get to supply a +-- head of the commit list that has not yet been committed. commitDistance1 :: - MutableByteArray# s -> -- target - Int# -> -- offset into target - MutableByteArray# s -> -- head of array - Int# -> -- offset into head of array - Commits s -> - Int# + MutableByteArray# s -- target + -> Int# -- offset into target + -> MutableByteArray# s -- head of array + -> Int# -- offset into head of array + -> Commits s + -> Int# commitDistance1 target offTarget buf0 offBuf cs = case Exts.sameMutableByteArray# target buf0 of 1# -> offBuf -# offTarget _ -> commitDistance target offBuf cs -# offTarget -{- | Compute the number of bytes between the last byte and the offset -specified in a chunk. Precondition: the chunk must exist in the -list of committed chunks. This relies on mutable byte arrays having -identity (e.g. it uses @sameMutableByteArray#@). --} +-- | Compute the number of bytes between the last byte and the offset +-- specified in a chunk. Precondition: the chunk must exist in the +-- list of committed chunks. This relies on mutable byte arrays having +-- identity (e.g. it uses @sameMutableByteArray#@). commitDistance :: MutableByteArray# s -> Int# -> Commits s -> Int# commitDistance !_ !_ Initial = errorWithoutStackTrace "chunkDistance: chunk not found" commitDistance target !n (Immutable _ _ len cs) = @@ -361,59 +315,46 @@ commitDistance target !n (Mutable buf len cs) = 1# -> n +# len _ -> commitDistance target (n +# len) cs -{- | Encode (UTF-8 encoded) text as a JSON string, wrapping it in double quotes. -This escapes all characters with code points below @0x20@. - -* Precondition: The slice of the byte argument is UTF-8 encoded text. -* Precondition: There is enough space in the buffer for the result - to be written to. A simple way to ensure enough space is to allocate - @6N + 2@ bytes, where N is the length of the argument. However, the - caller may use clever heuristics to find a lower upper bound. -* Result: The next offset in the destination buffer --} +-- | Encode (UTF-8 encoded) text as a JSON string, wrapping it in double quotes. +-- This escapes all characters with code points below @0x20@. +-- +-- * Precondition: The slice of the byte argument is UTF-8 encoded text. +-- * Precondition: There is enough space in the buffer for the result +-- to be written to. A simple way to ensure enough space is to allocate +-- @3N + 2@ bytes, where N is the length of the argument. However, the +-- caller may use clever heuristics to find a lower upper bound. +-- * Result: The next offset in the destination buffer pasteUtf8TextJson# :: - -- | source - ByteArray# -> - -- | source offset - Int# -> - -- | source length - Int# -> - -- | destination buffer - MutableByteArray# s -> - -- | offset into destination buffer - Int# -> - -- | state token - State# s -> - (# State# s, Int# #) -- returns next destination offset -{-# NOINLINE pasteUtf8TextJson# #-} + ByteArray# -- ^ source + -> Int# -- ^ source offset + -> Int# -- ^ source length + -> MutableByteArray# s -- ^ destination buffer + -> Int# -- ^ offset into destination buffer + -> State# s -- ^ state token + -> (# State# s, Int# #) -- returns next destination offset +{-# noinline pasteUtf8TextJson# #-} pasteUtf8TextJson# src# soff0# slen0# dst# doff0# s0# = let ST f = do let dst = MutableByteArray dst# let doff0 = I# doff0# PM.writeByteArray dst doff0 (c2w '"') - let go !soff !slen !doff = - if slen > 0 - then case indexChar8Array (ByteArray src#) soff of - '\\' -> write2 dst doff '\\' '\\' *> go (soff + 1) (slen - 1) (doff + 2) - '\"' -> write2 dst doff '\\' '\"' *> go (soff + 1) (slen - 1) (doff + 2) - c -> - if c >= '\x20' - then PM.writeByteArray dst doff (c2w c) *> go (soff + 1) (slen - 1) (doff + 1) - else case c of - '\n' -> write2 dst doff '\\' 'n' *> go (soff + 1) (slen - 1) (doff + 2) - '\r' -> write2 dst doff '\\' 'r' *> go (soff + 1) (slen - 1) (doff + 2) - '\t' -> write2 dst doff '\\' 't' *> go (soff + 1) (slen - 1) (doff + 2) - '\b' -> write2 dst doff '\\' 'b' *> go (soff + 1) (slen - 1) (doff + 2) - '\f' -> write2 dst doff '\\' 'f' *> go (soff + 1) (slen - 1) (doff + 2) - _ -> do - write2 dst doff '\\' 'u' - doff' <- - UnsafeBounded.pasteST - (Bounded.word16PaddedUpperHex (fromIntegral (c2w c))) - dst - (doff + 2) - go (soff + 1) (slen - 1) doff' - else pure doff + let go !soff !slen !doff = if slen > 0 + then case indexChar8Array (ByteArray src#) soff of + '\\' -> write2 dst doff '\\' '\\' *> go (soff + 1) (slen - 1) (doff + 2) + '\"' -> write2 dst doff '\\' '\"' *> go (soff + 1) (slen - 1) (doff + 2) + c -> if c >= '\x20' + then PM.writeByteArray dst doff (c2w c) *> go (soff + 1) (slen - 1) (doff + 1) + else case c of + '\n' -> write2 dst doff '\\' 'n' *> go (soff + 1) (slen - 1) (doff + 2) + '\r' -> write2 dst doff '\\' 'r' *> go (soff + 1) (slen - 1) (doff + 2) + '\t' -> write2 dst doff '\\' 't' *> go (soff + 1) (slen - 1) (doff + 2) + _ -> do + write2 dst doff '\\' 'u' + doff' <- UnsafeBounded.pasteST + (Bounded.word16PaddedUpperHex (fromIntegral (c2w c))) + dst (doff + 2) + go (soff + 1) (slen - 1) doff' + else pure doff doffRes <- go (I# soff0#) (I# slen0#) (doff0 + 1) PM.writeByteArray dst doffRes (c2w '"') pure (doffRes + 1) @@ -421,7 +362,7 @@ pasteUtf8TextJson# src# soff0# slen0# dst# doff0# s0# = in (# s1, dstFinal #) c2w :: Char -> Word8 -{-# INLINE c2w #-} +{-# inline c2w #-} c2w = fromIntegral . ord -- Internal. Write two characters in the ASCII plane to a byte array. @@ -431,5 +372,5 @@ write2 marr ix a b = do PM.writeByteArray marr (ix + 1) (c2w b) indexChar8Array :: ByteArray -> Int -> Char -{-# INLINE indexChar8Array #-} +{-# inline indexChar8Array #-} indexChar8Array (ByteArray b) (I# i) = C# (Exts.indexCharArray# b i) diff --git a/test/Main.hs b/test/Main.hs index af18163..1b206c7 100644 --- a/test/Main.hs +++ b/test/Main.hs @@ -1,58 +1,53 @@ -{-# LANGUAGE BangPatterns #-} -{-# LANGUAGE NumericUnderscores #-} -{-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE QuasiQuotes #-} -{-# LANGUAGE ScopedTypeVariables #-} -{-# LANGUAGE TypeApplications #-} +{-# language BangPatterns #-} +{-# language NumericUnderscores #-} +{-# language OverloadedStrings #-} +{-# language QuasiQuotes #-} +{-# language ScopedTypeVariables #-} +{-# language TypeApplications #-} + {-# OPTIONS_GHC -fno-warn-orphans #-} import Prelude hiding (replicate) --- liftA2 is needed by GHC 9.4 import Control.Applicative (liftA2) import Control.Monad.ST (runST) import Data.Bytes.Builder -import Data.Bytes.Types (MutableBytes (MutableBytes)) -import Data.Char (chr, ord) -import Data.IORef (IORef, newIORef, readIORef, writeIORef) -import Data.Primitive (ByteArray, PrimArray) -import Data.WideWord (Word128 (Word128), Word256 (Word256)) +import Data.Bytes.Builder.Template (bldr) +import Data.Bytes.Types (MutableBytes(MutableBytes)) +import Data.Char (ord,chr) +import Data.IORef (IORef,newIORef,readIORef,writeIORef) +import Data.Maybe (fromMaybe) +import Data.Primitive (ByteArray) +import Data.Primitive (PrimArray) +import Data.Text.Short (ShortText) +import Data.WideWord (Word128(Word128),Word256(Word256)) import Data.Word import Numeric.Natural (Natural) -import Test.QuickCheck (Arbitrary, (===)) +import Test.QuickCheck ((===),Arbitrary) import Test.QuickCheck.Instances.Natural () -import Test.Tasty (TestTree, defaultMain, testGroup) +import Test.Tasty (defaultMain,testGroup,TestTree) import Test.Tasty.HUnit ((@=?)) import Text.Printf (printf) -#ifdef QUOTER -import Data.Bytes.Builder.Template (bldr) -import Data.Maybe (fromMaybe) -import Data.Text.Short (ShortText) -#endif - import qualified Arithmetic.Nat as Nat import qualified Data.Bits as Bits +import qualified Data.Bytes as Bytes +import qualified Data.Bytes.Builder as Builder +import qualified Data.Bytes.Builder.Bounded as Bounded +import qualified Data.Bytes.Chunks as Chunks +import qualified Data.Bytes.Text.Ascii as Ascii +import qualified Data.Bytes.Text.Latin1 as Latin1 import qualified Data.ByteString as ByteString import qualified Data.ByteString.Builder as BB import qualified Data.ByteString.Lazy.Char8 as LB -import qualified Data.Bytes as Bytes -import qualified Data.Bytes.Builder.Bounded as Bounded -import qualified Data.Bytes.Chunks as Chunks -import qualified Data.Bytes.Text.Latin1 as Latin1 import qualified Data.List as L import qualified Data.Primitive as PM import qualified Data.Text as T import qualified Data.Text.Encoding as TE import qualified GHC.Exts as Exts +import qualified Prelude import qualified Test.Tasty.HUnit as THU import qualified Test.Tasty.QuickCheck as TQC -import qualified Prelude - -#ifdef QUOTER -import qualified Data.Bytes.Builder as Builder -import qualified Data.Bytes.Text.Ascii as Ascii -#endif import qualified HexWord64 import qualified Word16Tree @@ -61,311 +56,299 @@ main :: IO () main = defaultMain tests tests :: TestTree -tests = - testGroup - "Tests" - [ testGroup - "live" - [ TQC.testProperty "word64Dec" $ \w -> - runConcat 1 (word64Dec w) === pack (show w) - , TQC.testProperty "word64Dec-x3" $ \x y z -> - runConcat 1 (word64Dec x <> word64Dec y <> word64Dec z) - === pack (show x ++ show y ++ show z) - , TQC.testProperty "int64Dec-x3" $ \x y z -> - runConcat 1 (int64Dec x <> int64Dec y <> int64Dec z) - === pack (show x ++ show y ++ show z) - , TQC.testProperty "word64BE-x3" $ \x y z -> - runConcat 1 (word64BE x <> word64BE y <> word64BE z) - === pack (LB.unpack (BB.toLazyByteString (BB.word64BE x <> BB.word64BE y <> BB.word64BE z))) - , TQC.testProperty "word256PaddedLowerHex" $ \w -> - Bounded.run Nat.constant (Bounded.word256PaddedLowerHex w) - === pack (showWord256PaddedLowerHex w) - , TQC.testProperty "word128PaddedUpperHex" $ \w -> - Bounded.run Nat.constant (Bounded.word128PaddedUpperHex w) - === pack (showWord128PaddedUpperHex w) - , TQC.testProperty "word64PaddedUpperHex" $ \w -> - runConcat 1 (word64PaddedUpperHex w) - === pack (showWord64PaddedUpperHex w) - , TQC.testProperty "word16PaddedLowerHex" $ \w -> - runConcat 1 (word16PaddedLowerHex w) - === pack (showWord16PaddedLowerHex w) - , TQC.testProperty "wordPaddedDec2" $ TQC.forAll (TQC.choose (0, 99)) $ \w -> - Bounded.run Nat.two (Bounded.wordPaddedDec2 w) - === pack (zeroPadL 2 (show w)) - , TQC.testProperty "wordPaddedDec4" $ TQC.forAll (TQC.choose (0, 9999)) $ \w -> - Bounded.run Nat.constant (Bounded.wordPaddedDec4 w) - === pack (zeroPadL 4 (show w)) - , TQC.testProperty "wordPaddedDec9" $ TQC.forAll (TQC.choose (0, 999999999)) $ \w -> - Bounded.run Nat.constant (Bounded.wordPaddedDec9 w) - === pack (zeroPadL 9 (show w)) - , TQC.testProperty "word8Dec" $ \w -> - runConcat 1 (word8Dec w) - === pack (show w) - , TQC.testProperty "consLength32BE" $ \w -> - runConcat 1 (consLength32BE (word8Dec w)) - === pack ('\x00' : '\x00' : '\x00' : chr (L.length (show w)) : show w) - , TQC.testProperty "consLength64BE-uni" $ \w -> - pack - ( '\x00' - : '\x00' - : '\x00' - : '\x00' - : '\x00' - : '\x00' - : '\x00' - : chr (L.length (show w)) - : show w +tests = testGroup "Tests" + [ testGroup "live" + [ TQC.testProperty "word64Dec" $ \w -> + runConcat 1 (word64Dec w) === pack (show w) + , TQC.testProperty "word64Dec-x3" $ \x y z -> + runConcat 1 (word64Dec x <> word64Dec y <> word64Dec z) + === + pack (show x ++ show y ++ show z) + , TQC.testProperty "int64Dec-x3" $ \x y z -> + runConcat 1 (int64Dec x <> int64Dec y <> int64Dec z) + === + pack (show x ++ show y ++ show z) + , TQC.testProperty "word64BE-x3" $ \x y z -> + runConcat 1 (word64BE x <> word64BE y <> word64BE z) + === + pack (LB.unpack (BB.toLazyByteString (BB.word64BE x <> BB.word64BE y <> BB.word64BE z))) + , TQC.testProperty "word256PaddedLowerHex" $ \w -> + Bounded.run Nat.constant (Bounded.word256PaddedLowerHex w) + === + pack (showWord256PaddedLowerHex w) + , TQC.testProperty "word128PaddedUpperHex" $ \w -> + Bounded.run Nat.constant (Bounded.word128PaddedUpperHex w) + === + pack (showWord128PaddedUpperHex w) + , TQC.testProperty "word64PaddedUpperHex" $ \w -> + runConcat 1 (word64PaddedUpperHex w) + === + pack (showWord64PaddedUpperHex w) + , TQC.testProperty "word16PaddedLowerHex" $ \w -> + runConcat 1 (word16PaddedLowerHex w) + === + pack (showWord16PaddedLowerHex w) + , TQC.testProperty "wordPaddedDec2" $ TQC.forAll (TQC.choose (0,99)) $ \w -> + Bounded.run Nat.two (Bounded.wordPaddedDec2 w) + === + pack (zeroPadL 2 (show w)) + , TQC.testProperty "wordPaddedDec4" $ TQC.forAll (TQC.choose (0,9999)) $ \w -> + Bounded.run Nat.constant (Bounded.wordPaddedDec4 w) + === + pack (zeroPadL 4 (show w)) + , TQC.testProperty "wordPaddedDec9" $ TQC.forAll (TQC.choose (0,999999999)) $ \w -> + Bounded.run Nat.constant (Bounded.wordPaddedDec9 w) + === + pack (zeroPadL 9 (show w)) + , TQC.testProperty "word8Dec" $ \w -> + runConcat 1 (word8Dec w) + === + pack (show w) + , TQC.testProperty "consLength32BE" $ \w -> + runConcat 1 (consLength32BE (word8Dec w)) + === + pack ('\x00' : '\x00' : '\x00' : chr (L.length (show w)) : show w) + , TQC.testProperty "consLength64BE-uni" $ \w -> + pack + ( '\x00' : '\x00' : '\x00' : '\x00' + : '\x00' : '\x00' : '\x00' : chr (L.length (show w)) + : show w + ) + === + runConcat 1 (consLength64BE (word16Dec w)) + , TQC.testProperty "consLength64BE-multi" $ \w -> + pack + ( '\x00' : '\x00' : '\x00' : '\x00' + : '\x00' : '\x00' : '\x00' : chr (1 + L.length (show w)) + : '\x42' : show w + ) + === + runConcat 1 (consLength64BE (word8 0x42 <> flush 2 <> word16Dec w)) + , THU.testCase "stringUtf8" $ + packUtf8 "¿Cómo estás? I am doing well." @=? + runConcat 1 (stringUtf8 "¿Cómo estás? I am doing well.") + , THU.testCase "doubleDec-A" $ + pack (show (2 :: Int)) @=? runConcat 1 (doubleDec 2.0) + , THU.testCase "doubleDec-B" $ + pack (show (2.5 :: Double)) @=? runConcat 1 (doubleDec 2.5) + , THU.testCase "doubleDec-C" $ + pack ("1e+15") @=? runConcat 1 (doubleDec 1e15) + , THU.testCase "doubleDec-D" $ + pack ("-42") @=? runConcat 1 (doubleDec (-42)) + , THU.testCase "doubleDec-E" $ + AsciiByteArray (pack ("-8.8888888888889e+14")) @=? AsciiByteArray (runConcat 1 (doubleDec (-888888888888888.8888888))) + , THU.testCase "doubleDec-F" $ + pack ("42") @=? runConcat 1 (doubleDec 42) + , THU.testCase "doubleDec-G" $ + pack ("0") @=? runConcat 1 (doubleDec 0) + , THU.testCase "doubleDec-H" $ + pack ("0.5") @=? runConcat 1 (doubleDec 0.5) + , THU.testCase "doubleDec-I" $ + pack ("-0.5") @=? runConcat 1 (doubleDec (-0.5)) + , THU.testCase "doubleDec-J" $ + pack ("999999999") @=? runConcat 1 (doubleDec 999999999) + , THU.testCase "doubleDec-K" $ + pack ("-99999999") @=? runConcat 1 (doubleDec (-99999999)) + , THU.testCase "doubleDec-L" $ + AsciiByteArray (pack ("6.6666666666667e-12")) @=? AsciiByteArray (runConcat 1 (doubleDec (2 / 300_000_000_000))) + , THU.testCase "doubleDec-M" $ + AsciiByteArray (pack ("6.6666666666667e-10")) @=? AsciiByteArray (runConcat 1 (doubleDec 6.666666666666667e-10)) + , THU.testCase "doubleDec-N" $ + AsciiByteArray (pack ("5e-10")) @=? AsciiByteArray (runConcat 1 (doubleDec 5.0e-10)) + , THU.testCase "doubleDec-O" $ + AsciiByteArray (pack ("1.6666666666667e-10")) @=? AsciiByteArray (runConcat 1 (doubleDec 1.6666666666666669e-10)) + , THU.testCase "doubleDec-P" $ + AsciiByteArray (pack ("1e-09")) @=? AsciiByteArray (runConcat 1 (doubleDec 1.0e-9)) + , THU.testCase "doubleDec-Q" $ + AsciiByteArray (pack ("1e-08")) @=? AsciiByteArray (runConcat 1 (doubleDec 1.0e-8)) + , THU.testCase "shortTextJsonString-A" $ + pack ("\"hello\"") @=? runConcat 1 (shortTextJsonString "hello") + , THU.testCase "shortTextJsonString-B" $ + pack ("\"\\\\_\\\"_/\"") @=? runConcat 1 (shortTextJsonString "\\_\"_/") + , THU.testCase "shortTextJsonString-C" $ + pack ("\"Hi\\r\\nLo\"") @=? runConcat 1 (shortTextJsonString "Hi\r\nLo") + , THU.testCase "shortTextJsonString-D" $ + pack ("\"Hi\\u001BLo\"") @=? runConcat 1 (shortTextJsonString "Hi\ESCLo") + , THU.testCase "word-16-tree" $ + Word16Tree.expectedSmall @=? runConcat 1 + (Word16Tree.encode Word16Tree.exampleSmall) + , THU.testCase "byteArray-small" $ + let a = replicateByte 3 0x50 + b = replicateByte 5 0x51 + in mconcat [a,b] @=? runConcat 1 + ( byteArray a <> byteArray b ) + , THU.testCase "byteArray-big" $ + let a = replicateByte 2105 0x50 + b = replicateByte 725 0x51 + c = replicateByte 900 0x52 + d = replicateByte 800 0x53 + e = replicateByte 700 0x54 + f = replicateByte 950 0x55 + g = replicateByte 975 0x56 + h = replicateByte 3000 0x57 + i = replicateByte 125 0x58 + in mconcat [a,b,c,d,e,f,g,h,i] @=? runConcat 1 + ( byteArray a <> byteArray b <> byteArray c <> + byteArray d <> byteArray e <> byteArray f <> + byteArray g <> byteArray h <> byteArray i ) - === runConcat 1 (consLength64BE (word16Dec w)) - , TQC.testProperty "consLength64BE-multi" $ \w -> - pack - ( '\x00' - : '\x00' - : '\x00' - : '\x00' - : '\x00' - : '\x00' - : '\x00' - : chr (1 + L.length (show w)) - : '\x42' - : show w - ) - === runConcat 1 (consLength64BE (word8 0x42 <> flush 2 <> word16Dec w)) - , THU.testCase "stringUtf8" $ - packUtf8 "¿Cómo estás? I am doing well." - @=? runConcat 1 (stringUtf8 "¿Cómo estás? I am doing well.") - , THU.testCase "doubleDec-A" $ - pack (show (2 :: Int)) @=? runConcat 1 (doubleDec 2.0) - , THU.testCase "doubleDec-B" $ - pack (show (2.5 :: Double)) @=? runConcat 1 (doubleDec 2.5) - , THU.testCase "doubleDec-C" $ - pack ("1e+15") @=? runConcat 1 (doubleDec 1e15) - , THU.testCase "doubleDec-D" $ - pack ("-42") @=? runConcat 1 (doubleDec (-42)) - , THU.testCase "doubleDec-E" $ - AsciiByteArray (pack ("-8.8888888888889e+14")) @=? AsciiByteArray (runConcat 1 (doubleDec (-888888888888888.8888888))) - , THU.testCase "doubleDec-F" $ - pack ("42") @=? runConcat 1 (doubleDec 42) - , THU.testCase "doubleDec-G" $ - pack ("0") @=? runConcat 1 (doubleDec 0) - , THU.testCase "doubleDec-H" $ - pack ("0.5") @=? runConcat 1 (doubleDec 0.5) - , THU.testCase "doubleDec-I" $ - pack ("-0.5") @=? runConcat 1 (doubleDec (-0.5)) - , THU.testCase "doubleDec-J" $ - pack ("999999999") @=? runConcat 1 (doubleDec 999999999) - , THU.testCase "doubleDec-K" $ - pack ("-99999999") @=? runConcat 1 (doubleDec (-99999999)) - , THU.testCase "doubleDec-L" $ - AsciiByteArray (pack ("6.6666666666667e-12")) @=? AsciiByteArray (runConcat 1 (doubleDec (2 / 300_000_000_000))) - , THU.testCase "doubleDec-M" $ - AsciiByteArray (pack ("6.6666666666667e-10")) @=? AsciiByteArray (runConcat 1 (doubleDec 6.666666666666667e-10)) - , THU.testCase "doubleDec-N" $ - AsciiByteArray (pack ("5e-10")) @=? AsciiByteArray (runConcat 1 (doubleDec 5.0e-10)) - , THU.testCase "doubleDec-O" $ - AsciiByteArray (pack ("1.6666666666667e-10")) @=? AsciiByteArray (runConcat 1 (doubleDec 1.6666666666666669e-10)) - , THU.testCase "doubleDec-P" $ - AsciiByteArray (pack ("1e-09")) @=? AsciiByteArray (runConcat 1 (doubleDec 1.0e-9)) - , THU.testCase "doubleDec-Q" $ - AsciiByteArray (pack ("1e-08")) @=? AsciiByteArray (runConcat 1 (doubleDec 1.0e-8)) - , THU.testCase "shortTextJsonString-A" $ - pack ("\"hello\"") @=? runConcat 1 (shortTextJsonString "hello") - , THU.testCase "shortTextJsonString-B" $ - pack ("\"\\\\_\\\"_/\"") @=? runConcat 1 (shortTextJsonString "\\_\"_/") - , THU.testCase "shortTextJsonString-C" $ - pack ("\"Hi\\r\\nLo\"") @=? runConcat 1 (shortTextJsonString "Hi\r\nLo") - , THU.testCase "shortTextJsonString-D" $ - pack ("\"Hi\\u001BLo\"") @=? runConcat 1 (shortTextJsonString "Hi\ESCLo") - , THU.testCase "word-16-tree" $ - Word16Tree.expectedSmall - @=? runConcat - 1 - (Word16Tree.encode Word16Tree.exampleSmall) - , THU.testCase "byteArray-small" $ - let a = replicateByte 3 0x50 - b = replicateByte 5 0x51 - in mconcat [a, b] - @=? runConcat - 1 - (byteArray a <> byteArray b) - , THU.testCase "byteArray-big" $ - let a = replicateByte 2105 0x50 - b = replicateByte 725 0x51 - c = replicateByte 900 0x52 - d = replicateByte 800 0x53 - e = replicateByte 700 0x54 - f = replicateByte 950 0x55 - g = replicateByte 975 0x56 - h = replicateByte 3000 0x57 - i = replicateByte 125 0x58 - in mconcat [a, b, c, d, e, f, g, h, i] - @=? runConcat - 1 - ( byteArray a - <> byteArray b - <> byteArray c - <> byteArray d - <> byteArray e - <> byteArray f - <> byteArray g - <> byteArray h - <> byteArray i - ) - , TQC.testProperty "word16ArrayLE" $ \(xs :: [Word16]) -> - let ys = Exts.fromList xs :: PrimArray Word16 - in runConcat 1 (foldMap word16LE xs) - === runConcat 1 (word16ArrayLE ys 0 (Prelude.length xs)) - , TQC.testProperty "word16ArrayBE" $ \(xs :: [Word16]) -> - let ys = Exts.fromList xs :: PrimArray Word16 - in runConcat 1 (foldMap word16BE xs) - === runConcat 1 (word16ArrayBE ys 0 (Prelude.length xs)) - , TQC.testProperty "word32ArrayLE" $ \(xs :: [Word32]) -> - let ys = Exts.fromList xs :: PrimArray Word32 - in runConcat 1 (foldMap word32LE xs) - === runConcat 1 (word32ArrayLE ys 0 (Prelude.length xs)) - , TQC.testProperty "word32ArrayBE" $ \(xs :: [Word32]) -> - let ys = Exts.fromList xs :: PrimArray Word32 - in runConcat 1 (foldMap word32BE xs) - === runConcat 1 (word32ArrayBE ys 0 (Prelude.length xs)) - , TQC.testProperty "word64ArrayLE" $ \(xs :: [Word64]) -> - let ys = Exts.fromList xs :: PrimArray Word64 - in runConcat 1 (foldMap word64LE xs) - === runConcat 1 (word64ArrayLE ys 0 (Prelude.length xs)) - , TQC.testProperty "word64ArrayBE" $ \(xs :: [Word64]) -> - let ys = Exts.fromList xs :: PrimArray Word64 - in runConcat 1 (foldMap word64BE xs) - === runConcat 1 (word64ArrayBE ys 0 (Prelude.length xs)) - , TQC.testProperty "word128ArrayLE" $ \(xs :: [Word128]) -> - let ys = Exts.fromList xs :: PrimArray Word128 - in runConcat 1 (foldMap word128LE xs) - === runConcat 1 (word128ArrayLE ys 0 (Prelude.length xs)) - , TQC.testProperty "word128ArrayBE" $ \(xs :: [Word128]) -> - let ys = Exts.fromList xs :: PrimArray Word128 - in runConcat 1 (foldMap word128BE xs) - === runConcat 1 (word128ArrayBE ys 0 (Prelude.length xs)) - , TQC.testProperty "word256ArrayLE" $ \(xs :: [Word256]) -> - let ys = Exts.fromList xs :: PrimArray Word256 - in runConcat 1 (foldMap word256LE xs) - === runConcat 1 (word256ArrayLE ys 0 (Prelude.length xs)) - , TQC.testProperty "word256ArrayBE" $ \(xs :: [Word256]) -> - let ys = Exts.fromList xs :: PrimArray Word256 - in runConcat 1 (foldMap word256BE xs) - === runConcat 1 (word256ArrayBE ys 0 (Prelude.length xs)) - , TQC.testProperty "word64Vlq" $ \(x :: Word64) -> - runConcat 1 (word64Vlq x) - === naiveVlq (fromIntegral x) - , TQC.testProperty "word64LEB128" $ \(x :: Word64) -> - runConcat 1 (word64LEB128 x) - === naiveLeb128 (fromIntegral x) - , TQC.testProperty "naturalDec-A" $ \(x :: Natural) -> - runConcat 1 (naturalDec x) - === pack (show x) - , TQC.testProperty "naturalDec-B" $ \(x :: Natural) -> - let y = 1234567892345678934678987654321 * x - in runConcat 1 (naturalDec y) - === pack (show y) - , testGroup - "leb128-encoding" - [ THU.testCase "16" $ - Chunks.concat (run 16 (word64LEB128 16)) - @=? Latin1.fromString "\x10" - , THU.testCase "1000000" $ - Chunks.concat (run 16 (word64LEB128 1000000)) - @=? Exts.fromList [0xc0, 0x84, 0x3d] - , THU.testCase "deadbeef-smile" $ do - let inp = Latin1.fromString "\xDE\xAD\xBE\xEF" - (Chunks.concat . run 16) (sevenEightSmile inp) - @=? Latin1.fromString "\x6F\x2B\x37\x6E\x0F" - ] - , testGroup - "seven/eight encoding" - [ THU.testCase "deadbeef" $ do - let inp = Latin1.fromString "\xDE\xAD\xBE\xEF" - (Chunks.concat . run 16) (sevenEightRight inp) - @=? Latin1.fromString "\x6F\x2B\x37\x6E\x78" - , THU.testCase "deadbeef-smile" $ do - let inp = Latin1.fromString "\xDE\xAD\xBE\xEF" - (Chunks.concat . run 16) (sevenEightSmile inp) - @=? Latin1.fromString "\x6F\x2B\x37\x6E\x0F" - ] - ] - , testGroup - "alternate" - [ TQC.testProperty "HexWord64" $ \x y -> - runConcat - 1 - ( fromBounded Nat.constant (HexWord64.word64PaddedUpperHex x) - <> fromBounded Nat.constant (HexWord64.word64PaddedUpperHex y) - ) - === pack (showWord64PaddedUpperHex x <> showWord64PaddedUpperHex y) - ] - , testGroup - "putMany" - [ THU.testCase "A" $ do - ref <- newIORef [] - let txt = "hello_world_are_you_listening" :: [Char] - putMany 7 ascii txt (bytesOntoRef ref) - res <- readIORef ref - id $ - [ map c2w "hello_" - , map c2w "world_" - , map c2w "are_yo" - , map c2w "u_list" - , map c2w "ening" - ] - @=? map Exts.toList (Exts.toList res) - ] - , testGroup - "putManyConsLength" - [ THU.testCase "A" $ do - ref <- newIORef [] - let txt = "hello_world_are_you_listening" :: [Char] - putManyConsLength - Nat.constant - (\n -> Bounded.word16BE (fromIntegral n)) - 16 - ascii - txt - (bytesOntoRef ref) - res <- readIORef ref - id $ - [ 0x00 : 0x0A : map c2w "hello_worl" - , 0x00 : 0x0A : map c2w "d_are_you_" - , 0x00 : 0x09 : map c2w "listening" - ] - @=? map Exts.toList (Exts.toList res) - ] -#ifdef QUOTER - , testGroup - "bytes templates" - [ THU.testCase "A" $ do - let name = Just ("foo" :: ShortText) - msgBuilder = [bldr|Hello `fromMaybe "World" name`!\n|] - msg = Chunks.concat . Builder.run 200 $ msgBuilder - in Ascii.fromString "Hello foo!\n" @=? msg - , THU.testCase "B" $ do - let one = "foo" :: ShortText - two = "bar" :: String - msgBuilder = [bldr|`one``two`|] - msg = Chunks.concat . Builder.run 200 $ msgBuilder - in Ascii.fromString "foobar" @=? msg - , THU.testCase "C" $ do - let msgBuilder = [bldr|a backtick for you: \`|] - msg = Chunks.concat . Builder.run 200 $ msgBuilder - in Ascii.fromString "a backtick for you: `" @=? msg - , THU.testCase "D" $ do - let i = 137 :: Int - msgBuilder = [bldr|there are `i` lights!|] - msg = Chunks.concat . Builder.run 200 $ msgBuilder - in Ascii.fromString "there are 137 lights!" @=? msg - ] -#endif + , TQC.testProperty "word16ArrayLE" $ \(xs :: [Word16]) -> + let ys = Exts.fromList xs :: PrimArray Word16 + in runConcat 1 (foldMap word16LE xs) + === + runConcat 1 (word16ArrayLE ys 0 (Prelude.length xs)) + , TQC.testProperty "word16ArrayBE" $ \(xs :: [Word16]) -> + let ys = Exts.fromList xs :: PrimArray Word16 + in runConcat 1 (foldMap word16BE xs) + === + runConcat 1 (word16ArrayBE ys 0 (Prelude.length xs)) + , TQC.testProperty "word32ArrayLE" $ \(xs :: [Word32]) -> + let ys = Exts.fromList xs :: PrimArray Word32 + in runConcat 1 (foldMap word32LE xs) + === + runConcat 1 (word32ArrayLE ys 0 (Prelude.length xs)) + , TQC.testProperty "word32ArrayBE" $ \(xs :: [Word32]) -> + let ys = Exts.fromList xs :: PrimArray Word32 + in runConcat 1 (foldMap word32BE xs) + === + runConcat 1 (word32ArrayBE ys 0 (Prelude.length xs)) + , TQC.testProperty "word64ArrayLE" $ \(xs :: [Word64]) -> + let ys = Exts.fromList xs :: PrimArray Word64 + in runConcat 1 (foldMap word64LE xs) + === + runConcat 1 (word64ArrayLE ys 0 (Prelude.length xs)) + , TQC.testProperty "word64ArrayBE" $ \(xs :: [Word64]) -> + let ys = Exts.fromList xs :: PrimArray Word64 + in runConcat 1 (foldMap word64BE xs) + === + runConcat 1 (word64ArrayBE ys 0 (Prelude.length xs)) + , TQC.testProperty "word128ArrayLE" $ \(xs :: [Word128]) -> + let ys = Exts.fromList xs :: PrimArray Word128 + in runConcat 1 (foldMap word128LE xs) + === + runConcat 1 (word128ArrayLE ys 0 (Prelude.length xs)) + , TQC.testProperty "word128ArrayBE" $ \(xs :: [Word128]) -> + let ys = Exts.fromList xs :: PrimArray Word128 + in runConcat 1 (foldMap word128BE xs) + === + runConcat 1 (word128ArrayBE ys 0 (Prelude.length xs)) + , TQC.testProperty "word256ArrayLE" $ \(xs :: [Word256]) -> + let ys = Exts.fromList xs :: PrimArray Word256 + in runConcat 1 (foldMap word256LE xs) + === + runConcat 1 (word256ArrayLE ys 0 (Prelude.length xs)) + , TQC.testProperty "word256ArrayBE" $ \(xs :: [Word256]) -> + let ys = Exts.fromList xs :: PrimArray Word256 + in runConcat 1 (foldMap word256BE xs) + === + runConcat 1 (word256ArrayBE ys 0 (Prelude.length xs)) + , TQC.testProperty "word64Vlq" $ \(x :: Word64) -> + runConcat 1 (word64Vlq x) + === + naiveVlq (fromIntegral x) + , TQC.testProperty "word64LEB128" $ \(x :: Word64) -> + runConcat 1 (word64LEB128 x) + === + naiveLeb128 (fromIntegral x) + , TQC.testProperty "naturalDec-A" $ \(x :: Natural) -> + runConcat 1 (naturalDec x) + === + pack (show x) + , TQC.testProperty "naturalDec-B" $ \(x :: Natural) -> + let y = 1234567892345678934678987654321 * x in + runConcat 1 (naturalDec y) + === + pack (show y) + , testGroup "leb128-encoding" + [ THU.testCase "16" $ + Chunks.concat (run 16 (word64LEB128 16)) + @=? + Latin1.fromString "\x10" + , THU.testCase "1000000" $ + Chunks.concat (run 16 (word64LEB128 1000000)) + @=? + Exts.fromList [0xc0,0x84,0x3d] + , THU.testCase "deadbeef-smile" $ do + let inp = Latin1.fromString "\xDE\xAD\xBE\xEF" + (Chunks.concat . run 16) (sevenEightSmile inp) + @=?Latin1.fromString "\x6F\x2B\x37\x6E\x0F" + ] + , testGroup "seven/eight encoding" + [ THU.testCase "deadbeef" $ do + let inp = Latin1.fromString "\xDE\xAD\xBE\xEF" + (Chunks.concat . run 16) (sevenEightRight inp) + @=? Latin1.fromString "\x6F\x2B\x37\x6E\x78" + , THU.testCase "deadbeef-smile" $ do + let inp = Latin1.fromString "\xDE\xAD\xBE\xEF" + (Chunks.concat . run 16) (sevenEightSmile inp) + @=?Latin1.fromString "\x6F\x2B\x37\x6E\x0F" + ] ] + , testGroup "alternate" + [ TQC.testProperty "HexWord64" $ \x y -> + runConcat 1 + ( fromBounded Nat.constant (HexWord64.word64PaddedUpperHex x) + <> fromBounded Nat.constant (HexWord64.word64PaddedUpperHex y) + ) + === + pack (showWord64PaddedUpperHex x <> showWord64PaddedUpperHex y) + ] + , testGroup "putMany" + [ THU.testCase "A" $ do + ref <- newIORef [] + let txt = "hello_world_are_you_listening" :: [Char] + putMany 7 ascii txt (bytesOntoRef ref) + res <- readIORef ref + id $ + [ map c2w "hello_" + , map c2w "world_" + , map c2w "are_yo" + , map c2w "u_list" + , map c2w "ening" + ] @=? map Exts.toList (Exts.toList res) + ] + , testGroup "putManyConsLength" + [ THU.testCase "A" $ do + ref <- newIORef [] + let txt = "hello_world_are_you_listening" :: [Char] + putManyConsLength Nat.constant + (\n -> Bounded.word16BE (fromIntegral n)) + 16 ascii txt (bytesOntoRef ref) + res <- readIORef ref + id $ + [ 0x00 : 0x0A : map c2w "hello_worl" + , 0x00 : 0x0A : map c2w "d_are_you_" + , 0x00 : 0x09 : map c2w "listening" + ] @=? map Exts.toList (Exts.toList res) + ] + , testGroup "bytes templates" + [ THU.testCase "A" $ do + let name = Just ("foo" :: ShortText) + msgBuilder = [bldr|Hello `fromMaybe "World" name`!\n|] + msg = Chunks.concat . Builder.run 200 $ msgBuilder + in Ascii.fromString "Hello foo!\n" @=? msg + , THU.testCase "B" $ do + let one = "foo" :: ShortText + two = "bar" :: String + msgBuilder = [bldr|`one``two`|] + msg = Chunks.concat . Builder.run 200 $ msgBuilder + in Ascii.fromString "foobar" @=? msg + , THU.testCase "C" $ do + let msgBuilder = [bldr|a backtick for you: \`|] + msg = Chunks.concat . Builder.run 200 $ msgBuilder + in Ascii.fromString "a backtick for you: `" @=? msg + , THU.testCase "D" $ do + let i = 137 :: Int + msgBuilder = [bldr|there are `i` lights!|] + msg = Chunks.concat . Builder.run 200 $ msgBuilder + in Ascii.fromString "there are 137 lights!" @=? msg + ] + ] bytesOntoRef :: - IORef [PM.ByteArray] -> - MutableBytes Exts.RealWorld -> - IO () + IORef [PM.ByteArray] + -> MutableBytes Exts.RealWorld + -> IO () bytesOntoRef !ref (MutableBytes buf off len) = do rs <- readIORef ref dst <- PM.newByteArray len @@ -408,10 +391,9 @@ newtype AsciiByteArray = AsciiByteArray ByteArray deriving (Eq) instance Show AsciiByteArray where - show (AsciiByteArray b) = - if Bytes.all (\w -> w >= 32 && w < 127) (Bytes.fromByteArray b) - then Latin1.toString (Bytes.fromByteArray b) - else show (show b) + show (AsciiByteArray b) = if Bytes.all (\w -> w >= 32 && w < 127) (Bytes.fromByteArray b) + then Latin1.toString (Bytes.fromByteArray b) + else show (show b) instance Arbitrary Word128 where arbitrary = liftA2 Word128 TQC.arbitrary TQC.arbitrary @@ -427,30 +409,29 @@ zeroPadL n s naiveLeb128 :: Natural -> ByteArray naiveLeb128 x = Bytes.toByteArray (Bytes.unsafeDrop 1 (Exts.fromList (0xFF : go [] x))) - where + where go !xs !n = - let (q, r) = quotRem n 128 + let (q,r) = quotRem n 128 r' = fromIntegral @Natural @Word8 r - w = - if q == 0 - then r' - else Bits.setBit r' 7 + w = if q == 0 + then r' + else Bits.setBit r' 7 xs' = w : xs - in if q == 0 + in if q == 0 then L.reverse xs' else go xs' q naiveVlq :: Natural -> ByteArray naiveVlq x = Bytes.toByteArray (Bytes.unsafeDrop 1 (Exts.fromList (0xFF : go [] x))) - where + where go !xs !n = - let (q, r) = quotRem n 128 + let (q,r) = quotRem n 128 r' = fromIntegral @Natural @Word8 r w = case xs of [] -> r' _ -> Bits.setBit r' 7 xs' = w : xs - in if q == 0 + in if q == 0 then xs' else go xs' q