Add a test for a very simple CSV encoding

This commit is contained in:
Andrew Martin 2019-09-20 09:23:08 -04:00
parent 459c48a2d9
commit cd3631e5fd
6 changed files with 82 additions and 3 deletions

32
bench/Cell.hs Normal file
View file

@ -0,0 +1,32 @@
{-# language OverloadedLists #-}
{-# language OverloadedStrings #-}
module Cell
( Cell(..)
, cells
) where
import Data.Word (Word32)
import Data.Text.Short (ShortText)
import Data.Primitive (SmallArray)
-- A cell in a CSV file
data Cell
= CellString !ShortText
| CellNumber !Word32
-- Some sample data to encode as a CSV
cells :: SmallArray (SmallArray Cell)
cells =
[ [ CellString "Randy", CellString "Gutiérrez", CellNumber 41, CellNumber 343 ]
, [ CellString "Édith", CellString "Piaf", CellNumber 63, CellNumber 453 ]
, [ CellString "Martha", CellString "Washington", CellNumber 51, CellNumber 634 ]
, [ CellString "Julius", CellString "Caesar", CellNumber 1, CellNumber 6922 ]
, [ CellString "Robert", CellString "Redford", CellNumber 24, CellNumber 617 ]
, [ CellString "Violet", CellString "Crawley", CellNumber 71, CellNumber 150 ]
, [ CellString "Lázaro", CellString "Cárdenas", CellNumber 58, CellNumber 299 ]
, [ CellString "Anastasia", CellString "San Martin", CellNumber 103, CellNumber 3214 ]
, [ CellString "Mad", CellString "Max", CellNumber 37, CellNumber 918 ]
, [ CellString "Sidonie-Gabrielle", CellString "Collette", CellNumber 25, CellNumber 904 ]
]

View file

@ -1,11 +1,17 @@
{-# language LambdaCase #-}
{-# language OverloadedStrings #-}
import Data.Primitive (ByteArray) import Data.Primitive (ByteArray)
import Data.Word (Word64) import Data.Word (Word64)
import Gauge (bgroup,bench,whnf) import Gauge (bgroup,bench,whnf)
import Gauge.Main (defaultMain) import Gauge.Main (defaultMain)
import qualified Arithmetic.Nat as Nat import qualified Arithmetic.Nat as Nat
import qualified Data.ByteArray.Builder as B
import qualified Data.ByteArray.Builder.Bounded as U import qualified Data.ByteArray.Builder.Bounded as U
import qualified Cell
import qualified SimpleCsv
import qualified HexWord64 import qualified HexWord64
main :: IO () main :: IO ()
@ -16,6 +22,11 @@ main = defaultMain
, bench "loop" (whnf encodeHexWord64sLoop w64s) , bench "loop" (whnf encodeHexWord64sLoop w64s)
] ]
] ]
, bgroup "unbounded"
[ bench "csv-no-escape" $ whnf
(\x -> B.run 4080 (SimpleCsv.encodeRows x))
Cell.cells
]
] ]
w64s :: Word64s w64s :: Word64s

31
bench/SimpleCsv.hs Normal file
View file

@ -0,0 +1,31 @@
{-# language LambdaCase #-}
-- A variant of CSV encoding that does not perform
-- any escaping or quoting. This is in its own module
-- to make it easy to analyze the GHC Core that it
-- gets compiled to.
module SimpleCsv
( encodeRows
) where
import Cell (Cell(..))
import Data.Primitive (SmallArray)
import qualified Data.Foldable as F
import qualified Data.ByteArray.Builder as B
encodeRows :: SmallArray (SmallArray Cell) -> B.Builder
encodeRows = F.foldr
(\r x -> encodeSimpleCsvRow r (B.ascii '\n' <> x))
mempty
encodeSimpleCsvRow :: SmallArray Cell -> B.Builder -> B.Builder
encodeSimpleCsvRow cs b = F.foldr
(\c x -> encodeSimpleCsvCell c <> B.ascii ',' <> x)
b
cs
encodeSimpleCsvCell :: Cell -> B.Builder
encodeSimpleCsvCell = \case
CellNumber n -> B.word32Dec n
CellString t -> B.shortTextUtf8 t

View file

@ -87,9 +87,12 @@ benchmark bench
, natural-arithmetic , natural-arithmetic
, primitive , primitive
, small-bytearray-builder , small-bytearray-builder
, text-short
ghc-options: -Wall -O2 ghc-options: -Wall -O2
default-language: Haskell2010 default-language: Haskell2010
hs-source-dirs: bench, common hs-source-dirs: bench, common
main-is: Main.hs main-is: Main.hs
other-modules: other-modules:
Cell
HexWord64 HexWord64
SimpleCsv

View file

@ -403,7 +403,7 @@ word8PaddedUpperHex w =
-- | Encode an ASCII char. -- | Encode an ASCII char.
-- Precondition: Input must be an ASCII character. This is not checked. -- Precondition: Input must be an ASCII character. This is not checked.
ascii :: Char -> Builder ascii :: Char -> Builder
ascii c = fromBounded Nat.constant (Bounded.char c) ascii c = fromBounded Nat.constant (Bounded.ascii c)
-- | Encode an UTF8 char. This only uses as much space as is required. -- | Encode an UTF8 char. This only uses as much space as is required.
char :: Char -> Builder char :: Char -> Builder

View file

@ -507,10 +507,12 @@ word8LowerHex# w#
where where
w = W# w# w = W# w#
-- | Encode an ASCII char. -- | Encode an ASCII character.
-- Precondition: Input must be an ASCII character. This is not checked. -- Precondition: Input must be an ASCII character. This is not checked.
ascii :: Char -> Builder 1 ascii :: Char -> Builder 1
ascii c = word8 (fromIntegral @Int @Word8 (ord c)) ascii (C# c) = Unsafe.construct $ \(MutableByteArray arr) (I# off) -> do
primitive_ (writeCharArray# arr off c)
pure (I# (off +# 1# ))
-- | Encode a character as UTF-8. This only uses as much space as is required. -- | Encode a character as UTF-8. This only uses as much space as is required.
char :: Char -> Builder 4 char :: Char -> Builder 4