Add a test for a very simple CSV encoding
This commit is contained in:
parent
459c48a2d9
commit
cd3631e5fd
6 changed files with 82 additions and 3 deletions
32
bench/Cell.hs
Normal file
32
bench/Cell.hs
Normal file
|
@ -0,0 +1,32 @@
|
|||
{-# language OverloadedLists #-}
|
||||
{-# language OverloadedStrings #-}
|
||||
|
||||
module Cell
|
||||
( Cell(..)
|
||||
, cells
|
||||
) where
|
||||
|
||||
import Data.Word (Word32)
|
||||
import Data.Text.Short (ShortText)
|
||||
import Data.Primitive (SmallArray)
|
||||
|
||||
-- A cell in a CSV file
|
||||
data Cell
|
||||
= CellString !ShortText
|
||||
| CellNumber !Word32
|
||||
|
||||
-- Some sample data to encode as a CSV
|
||||
cells :: SmallArray (SmallArray Cell)
|
||||
cells =
|
||||
[ [ CellString "Randy", CellString "Gutiérrez", CellNumber 41, CellNumber 343 ]
|
||||
, [ CellString "Édith", CellString "Piaf", CellNumber 63, CellNumber 453 ]
|
||||
, [ CellString "Martha", CellString "Washington", CellNumber 51, CellNumber 634 ]
|
||||
, [ CellString "Julius", CellString "Caesar", CellNumber 1, CellNumber 6922 ]
|
||||
, [ CellString "Robert", CellString "Redford", CellNumber 24, CellNumber 617 ]
|
||||
, [ CellString "Violet", CellString "Crawley", CellNumber 71, CellNumber 150 ]
|
||||
, [ CellString "Lázaro", CellString "Cárdenas", CellNumber 58, CellNumber 299 ]
|
||||
, [ CellString "Anastasia", CellString "San Martin", CellNumber 103, CellNumber 3214 ]
|
||||
, [ CellString "Mad", CellString "Max", CellNumber 37, CellNumber 918 ]
|
||||
, [ CellString "Sidonie-Gabrielle", CellString "Collette", CellNumber 25, CellNumber 904 ]
|
||||
]
|
||||
|
|
@ -1,11 +1,17 @@
|
|||
{-# language LambdaCase #-}
|
||||
{-# language OverloadedStrings #-}
|
||||
|
||||
import Data.Primitive (ByteArray)
|
||||
import Data.Word (Word64)
|
||||
import Gauge (bgroup,bench,whnf)
|
||||
import Gauge.Main (defaultMain)
|
||||
|
||||
import qualified Arithmetic.Nat as Nat
|
||||
import qualified Data.ByteArray.Builder as B
|
||||
import qualified Data.ByteArray.Builder.Bounded as U
|
||||
|
||||
import qualified Cell
|
||||
import qualified SimpleCsv
|
||||
import qualified HexWord64
|
||||
|
||||
main :: IO ()
|
||||
|
@ -16,6 +22,11 @@ main = defaultMain
|
|||
, bench "loop" (whnf encodeHexWord64sLoop w64s)
|
||||
]
|
||||
]
|
||||
, bgroup "unbounded"
|
||||
[ bench "csv-no-escape" $ whnf
|
||||
(\x -> B.run 4080 (SimpleCsv.encodeRows x))
|
||||
Cell.cells
|
||||
]
|
||||
]
|
||||
|
||||
w64s :: Word64s
|
||||
|
|
31
bench/SimpleCsv.hs
Normal file
31
bench/SimpleCsv.hs
Normal file
|
@ -0,0 +1,31 @@
|
|||
{-# language LambdaCase #-}
|
||||
|
||||
-- A variant of CSV encoding that does not perform
|
||||
-- any escaping or quoting. This is in its own module
|
||||
-- to make it easy to analyze the GHC Core that it
|
||||
-- gets compiled to.
|
||||
module SimpleCsv
|
||||
( encodeRows
|
||||
) where
|
||||
|
||||
import Cell (Cell(..))
|
||||
import Data.Primitive (SmallArray)
|
||||
|
||||
import qualified Data.Foldable as F
|
||||
import qualified Data.ByteArray.Builder as B
|
||||
|
||||
encodeRows :: SmallArray (SmallArray Cell) -> B.Builder
|
||||
encodeRows = F.foldr
|
||||
(\r x -> encodeSimpleCsvRow r (B.ascii '\n' <> x))
|
||||
mempty
|
||||
|
||||
encodeSimpleCsvRow :: SmallArray Cell -> B.Builder -> B.Builder
|
||||
encodeSimpleCsvRow cs b = F.foldr
|
||||
(\c x -> encodeSimpleCsvCell c <> B.ascii ',' <> x)
|
||||
b
|
||||
cs
|
||||
|
||||
encodeSimpleCsvCell :: Cell -> B.Builder
|
||||
encodeSimpleCsvCell = \case
|
||||
CellNumber n -> B.word32Dec n
|
||||
CellString t -> B.shortTextUtf8 t
|
|
@ -87,9 +87,12 @@ benchmark bench
|
|||
, natural-arithmetic
|
||||
, primitive
|
||||
, small-bytearray-builder
|
||||
, text-short
|
||||
ghc-options: -Wall -O2
|
||||
default-language: Haskell2010
|
||||
hs-source-dirs: bench, common
|
||||
main-is: Main.hs
|
||||
other-modules:
|
||||
Cell
|
||||
HexWord64
|
||||
SimpleCsv
|
||||
|
|
|
@ -403,7 +403,7 @@ word8PaddedUpperHex w =
|
|||
-- | Encode an ASCII char.
|
||||
-- Precondition: Input must be an ASCII character. This is not checked.
|
||||
ascii :: Char -> Builder
|
||||
ascii c = fromBounded Nat.constant (Bounded.char c)
|
||||
ascii c = fromBounded Nat.constant (Bounded.ascii c)
|
||||
|
||||
-- | Encode an UTF8 char. This only uses as much space as is required.
|
||||
char :: Char -> Builder
|
||||
|
|
|
@ -507,10 +507,12 @@ word8LowerHex# w#
|
|||
where
|
||||
w = W# w#
|
||||
|
||||
-- | Encode an ASCII char.
|
||||
-- | Encode an ASCII character.
|
||||
-- Precondition: Input must be an ASCII character. This is not checked.
|
||||
ascii :: Char -> Builder 1
|
||||
ascii c = word8 (fromIntegral @Int @Word8 (ord c))
|
||||
ascii (C# c) = Unsafe.construct $ \(MutableByteArray arr) (I# off) -> do
|
||||
primitive_ (writeCharArray# arr off c)
|
||||
pure (I# (off +# 1# ))
|
||||
|
||||
-- | Encode a character as UTF-8. This only uses as much space as is required.
|
||||
char :: Char -> Builder 4
|
||||
|
|
Loading…
Reference in a new issue