From cd3631e5fd88ab168b820f728898ddd1449193b5 Mon Sep 17 00:00:00 2001 From: Andrew Martin Date: Fri, 20 Sep 2019 09:23:08 -0400 Subject: [PATCH] Add a test for a very simple CSV encoding --- bench/Cell.hs | 32 +++++++++++++++++++++++++++ bench/Main.hs | 11 +++++++++ bench/SimpleCsv.hs | 31 ++++++++++++++++++++++++++ small-bytearray-builder.cabal | 3 +++ src/Data/ByteArray/Builder.hs | 2 +- src/Data/ByteArray/Builder/Bounded.hs | 6 +++-- 6 files changed, 82 insertions(+), 3 deletions(-) create mode 100644 bench/Cell.hs create mode 100644 bench/SimpleCsv.hs diff --git a/bench/Cell.hs b/bench/Cell.hs new file mode 100644 index 0000000..77d4f61 --- /dev/null +++ b/bench/Cell.hs @@ -0,0 +1,32 @@ +{-# language OverloadedLists #-} +{-# language OverloadedStrings #-} + +module Cell + ( Cell(..) + , cells + ) where + +import Data.Word (Word32) +import Data.Text.Short (ShortText) +import Data.Primitive (SmallArray) + +-- A cell in a CSV file +data Cell + = CellString !ShortText + | CellNumber !Word32 + +-- Some sample data to encode as a CSV +cells :: SmallArray (SmallArray Cell) +cells = + [ [ CellString "Randy", CellString "Gutiérrez", CellNumber 41, CellNumber 343 ] + , [ CellString "Édith", CellString "Piaf", CellNumber 63, CellNumber 453 ] + , [ CellString "Martha", CellString "Washington", CellNumber 51, CellNumber 634 ] + , [ CellString "Julius", CellString "Caesar", CellNumber 1, CellNumber 6922 ] + , [ CellString "Robert", CellString "Redford", CellNumber 24, CellNumber 617 ] + , [ CellString "Violet", CellString "Crawley", CellNumber 71, CellNumber 150 ] + , [ CellString "Lázaro", CellString "Cárdenas", CellNumber 58, CellNumber 299 ] + , [ CellString "Anastasia", CellString "San Martin", CellNumber 103, CellNumber 3214 ] + , [ CellString "Mad", CellString "Max", CellNumber 37, CellNumber 918 ] + , [ CellString "Sidonie-Gabrielle", CellString "Collette", CellNumber 25, CellNumber 904 ] + ] + diff --git a/bench/Main.hs b/bench/Main.hs index 65872c4..c4fefe4 100644 --- a/bench/Main.hs +++ b/bench/Main.hs @@ -1,11 +1,17 @@ +{-# language LambdaCase #-} +{-# language OverloadedStrings #-} + import Data.Primitive (ByteArray) import Data.Word (Word64) import Gauge (bgroup,bench,whnf) import Gauge.Main (defaultMain) import qualified Arithmetic.Nat as Nat +import qualified Data.ByteArray.Builder as B import qualified Data.ByteArray.Builder.Bounded as U +import qualified Cell +import qualified SimpleCsv import qualified HexWord64 main :: IO () @@ -16,6 +22,11 @@ main = defaultMain , bench "loop" (whnf encodeHexWord64sLoop w64s) ] ] + , bgroup "unbounded" + [ bench "csv-no-escape" $ whnf + (\x -> B.run 4080 (SimpleCsv.encodeRows x)) + Cell.cells + ] ] w64s :: Word64s diff --git a/bench/SimpleCsv.hs b/bench/SimpleCsv.hs new file mode 100644 index 0000000..bf9eea0 --- /dev/null +++ b/bench/SimpleCsv.hs @@ -0,0 +1,31 @@ +{-# language LambdaCase #-} + +-- A variant of CSV encoding that does not perform +-- any escaping or quoting. This is in its own module +-- to make it easy to analyze the GHC Core that it +-- gets compiled to. +module SimpleCsv + ( encodeRows + ) where + +import Cell (Cell(..)) +import Data.Primitive (SmallArray) + +import qualified Data.Foldable as F +import qualified Data.ByteArray.Builder as B + +encodeRows :: SmallArray (SmallArray Cell) -> B.Builder +encodeRows = F.foldr + (\r x -> encodeSimpleCsvRow r (B.ascii '\n' <> x)) + mempty + +encodeSimpleCsvRow :: SmallArray Cell -> B.Builder -> B.Builder +encodeSimpleCsvRow cs b = F.foldr + (\c x -> encodeSimpleCsvCell c <> B.ascii ',' <> x) + b + cs + +encodeSimpleCsvCell :: Cell -> B.Builder +encodeSimpleCsvCell = \case + CellNumber n -> B.word32Dec n + CellString t -> B.shortTextUtf8 t diff --git a/small-bytearray-builder.cabal b/small-bytearray-builder.cabal index 6460446..6025a5c 100644 --- a/small-bytearray-builder.cabal +++ b/small-bytearray-builder.cabal @@ -87,9 +87,12 @@ benchmark bench , natural-arithmetic , primitive , small-bytearray-builder + , text-short ghc-options: -Wall -O2 default-language: Haskell2010 hs-source-dirs: bench, common main-is: Main.hs other-modules: + Cell HexWord64 + SimpleCsv diff --git a/src/Data/ByteArray/Builder.hs b/src/Data/ByteArray/Builder.hs index 4b36b17..146926a 100644 --- a/src/Data/ByteArray/Builder.hs +++ b/src/Data/ByteArray/Builder.hs @@ -403,7 +403,7 @@ word8PaddedUpperHex w = -- | Encode an ASCII char. -- Precondition: Input must be an ASCII character. This is not checked. ascii :: Char -> Builder -ascii c = fromBounded Nat.constant (Bounded.char c) +ascii c = fromBounded Nat.constant (Bounded.ascii c) -- | Encode an UTF8 char. This only uses as much space as is required. char :: Char -> Builder diff --git a/src/Data/ByteArray/Builder/Bounded.hs b/src/Data/ByteArray/Builder/Bounded.hs index 61ce345..5d41e47 100644 --- a/src/Data/ByteArray/Builder/Bounded.hs +++ b/src/Data/ByteArray/Builder/Bounded.hs @@ -507,10 +507,12 @@ word8LowerHex# w# where w = W# w# --- | Encode an ASCII char. +-- | Encode an ASCII character. -- Precondition: Input must be an ASCII character. This is not checked. ascii :: Char -> Builder 1 -ascii c = word8 (fromIntegral @Int @Word8 (ord c)) +ascii (C# c) = Unsafe.construct $ \(MutableByteArray arr) (I# off) -> do + primitive_ (writeCharArray# arr off c) + pure (I# (off +# 1# )) -- | Encode a character as UTF-8. This only uses as much space as is required. char :: Char -> Builder 4