Add a test for a very simple CSV encoding

This commit is contained in:
Andrew Martin 2019-09-20 09:23:08 -04:00
parent 459c48a2d9
commit cd3631e5fd
6 changed files with 82 additions and 3 deletions

32
bench/Cell.hs Normal file
View file

@ -0,0 +1,32 @@
{-# language OverloadedLists #-}
{-# language OverloadedStrings #-}
module Cell
( Cell(..)
, cells
) where
import Data.Word (Word32)
import Data.Text.Short (ShortText)
import Data.Primitive (SmallArray)
-- A cell in a CSV file
data Cell
= CellString !ShortText
| CellNumber !Word32
-- Some sample data to encode as a CSV
cells :: SmallArray (SmallArray Cell)
cells =
[ [ CellString "Randy", CellString "Gutiérrez", CellNumber 41, CellNumber 343 ]
, [ CellString "Édith", CellString "Piaf", CellNumber 63, CellNumber 453 ]
, [ CellString "Martha", CellString "Washington", CellNumber 51, CellNumber 634 ]
, [ CellString "Julius", CellString "Caesar", CellNumber 1, CellNumber 6922 ]
, [ CellString "Robert", CellString "Redford", CellNumber 24, CellNumber 617 ]
, [ CellString "Violet", CellString "Crawley", CellNumber 71, CellNumber 150 ]
, [ CellString "Lázaro", CellString "Cárdenas", CellNumber 58, CellNumber 299 ]
, [ CellString "Anastasia", CellString "San Martin", CellNumber 103, CellNumber 3214 ]
, [ CellString "Mad", CellString "Max", CellNumber 37, CellNumber 918 ]
, [ CellString "Sidonie-Gabrielle", CellString "Collette", CellNumber 25, CellNumber 904 ]
]

View file

@ -1,11 +1,17 @@
{-# language LambdaCase #-}
{-# language OverloadedStrings #-}
import Data.Primitive (ByteArray)
import Data.Word (Word64)
import Gauge (bgroup,bench,whnf)
import Gauge.Main (defaultMain)
import qualified Arithmetic.Nat as Nat
import qualified Data.ByteArray.Builder as B
import qualified Data.ByteArray.Builder.Bounded as U
import qualified Cell
import qualified SimpleCsv
import qualified HexWord64
main :: IO ()
@ -16,6 +22,11 @@ main = defaultMain
, bench "loop" (whnf encodeHexWord64sLoop w64s)
]
]
, bgroup "unbounded"
[ bench "csv-no-escape" $ whnf
(\x -> B.run 4080 (SimpleCsv.encodeRows x))
Cell.cells
]
]
w64s :: Word64s

31
bench/SimpleCsv.hs Normal file
View file

@ -0,0 +1,31 @@
{-# language LambdaCase #-}
-- A variant of CSV encoding that does not perform
-- any escaping or quoting. This is in its own module
-- to make it easy to analyze the GHC Core that it
-- gets compiled to.
module SimpleCsv
( encodeRows
) where
import Cell (Cell(..))
import Data.Primitive (SmallArray)
import qualified Data.Foldable as F
import qualified Data.ByteArray.Builder as B
encodeRows :: SmallArray (SmallArray Cell) -> B.Builder
encodeRows = F.foldr
(\r x -> encodeSimpleCsvRow r (B.ascii '\n' <> x))
mempty
encodeSimpleCsvRow :: SmallArray Cell -> B.Builder -> B.Builder
encodeSimpleCsvRow cs b = F.foldr
(\c x -> encodeSimpleCsvCell c <> B.ascii ',' <> x)
b
cs
encodeSimpleCsvCell :: Cell -> B.Builder
encodeSimpleCsvCell = \case
CellNumber n -> B.word32Dec n
CellString t -> B.shortTextUtf8 t

View file

@ -87,9 +87,12 @@ benchmark bench
, natural-arithmetic
, primitive
, small-bytearray-builder
, text-short
ghc-options: -Wall -O2
default-language: Haskell2010
hs-source-dirs: bench, common
main-is: Main.hs
other-modules:
Cell
HexWord64
SimpleCsv

View file

@ -403,7 +403,7 @@ word8PaddedUpperHex w =
-- | Encode an ASCII char.
-- Precondition: Input must be an ASCII character. This is not checked.
ascii :: Char -> Builder
ascii c = fromBounded Nat.constant (Bounded.char c)
ascii c = fromBounded Nat.constant (Bounded.ascii c)
-- | Encode an UTF8 char. This only uses as much space as is required.
char :: Char -> Builder

View file

@ -507,10 +507,12 @@ word8LowerHex# w#
where
w = W# w#
-- | Encode an ASCII char.
-- | Encode an ASCII character.
-- Precondition: Input must be an ASCII character. This is not checked.
ascii :: Char -> Builder 1
ascii c = word8 (fromIntegral @Int @Word8 (ord c))
ascii (C# c) = Unsafe.construct $ \(MutableByteArray arr) (I# off) -> do
primitive_ (writeCharArray# arr off c)
pure (I# (off +# 1# ))
-- | Encode a character as UTF-8. This only uses as much space as is required.
char :: Char -> Builder 4