Add a test for a very simple CSV encoding
This commit is contained in:
parent
459c48a2d9
commit
cd3631e5fd
6 changed files with 82 additions and 3 deletions
32
bench/Cell.hs
Normal file
32
bench/Cell.hs
Normal file
|
@ -0,0 +1,32 @@
|
||||||
|
{-# language OverloadedLists #-}
|
||||||
|
{-# language OverloadedStrings #-}
|
||||||
|
|
||||||
|
module Cell
|
||||||
|
( Cell(..)
|
||||||
|
, cells
|
||||||
|
) where
|
||||||
|
|
||||||
|
import Data.Word (Word32)
|
||||||
|
import Data.Text.Short (ShortText)
|
||||||
|
import Data.Primitive (SmallArray)
|
||||||
|
|
||||||
|
-- A cell in a CSV file
|
||||||
|
data Cell
|
||||||
|
= CellString !ShortText
|
||||||
|
| CellNumber !Word32
|
||||||
|
|
||||||
|
-- Some sample data to encode as a CSV
|
||||||
|
cells :: SmallArray (SmallArray Cell)
|
||||||
|
cells =
|
||||||
|
[ [ CellString "Randy", CellString "Gutiérrez", CellNumber 41, CellNumber 343 ]
|
||||||
|
, [ CellString "Édith", CellString "Piaf", CellNumber 63, CellNumber 453 ]
|
||||||
|
, [ CellString "Martha", CellString "Washington", CellNumber 51, CellNumber 634 ]
|
||||||
|
, [ CellString "Julius", CellString "Caesar", CellNumber 1, CellNumber 6922 ]
|
||||||
|
, [ CellString "Robert", CellString "Redford", CellNumber 24, CellNumber 617 ]
|
||||||
|
, [ CellString "Violet", CellString "Crawley", CellNumber 71, CellNumber 150 ]
|
||||||
|
, [ CellString "Lázaro", CellString "Cárdenas", CellNumber 58, CellNumber 299 ]
|
||||||
|
, [ CellString "Anastasia", CellString "San Martin", CellNumber 103, CellNumber 3214 ]
|
||||||
|
, [ CellString "Mad", CellString "Max", CellNumber 37, CellNumber 918 ]
|
||||||
|
, [ CellString "Sidonie-Gabrielle", CellString "Collette", CellNumber 25, CellNumber 904 ]
|
||||||
|
]
|
||||||
|
|
|
@ -1,11 +1,17 @@
|
||||||
|
{-# language LambdaCase #-}
|
||||||
|
{-# language OverloadedStrings #-}
|
||||||
|
|
||||||
import Data.Primitive (ByteArray)
|
import Data.Primitive (ByteArray)
|
||||||
import Data.Word (Word64)
|
import Data.Word (Word64)
|
||||||
import Gauge (bgroup,bench,whnf)
|
import Gauge (bgroup,bench,whnf)
|
||||||
import Gauge.Main (defaultMain)
|
import Gauge.Main (defaultMain)
|
||||||
|
|
||||||
import qualified Arithmetic.Nat as Nat
|
import qualified Arithmetic.Nat as Nat
|
||||||
|
import qualified Data.ByteArray.Builder as B
|
||||||
import qualified Data.ByteArray.Builder.Bounded as U
|
import qualified Data.ByteArray.Builder.Bounded as U
|
||||||
|
|
||||||
|
import qualified Cell
|
||||||
|
import qualified SimpleCsv
|
||||||
import qualified HexWord64
|
import qualified HexWord64
|
||||||
|
|
||||||
main :: IO ()
|
main :: IO ()
|
||||||
|
@ -16,6 +22,11 @@ main = defaultMain
|
||||||
, bench "loop" (whnf encodeHexWord64sLoop w64s)
|
, bench "loop" (whnf encodeHexWord64sLoop w64s)
|
||||||
]
|
]
|
||||||
]
|
]
|
||||||
|
, bgroup "unbounded"
|
||||||
|
[ bench "csv-no-escape" $ whnf
|
||||||
|
(\x -> B.run 4080 (SimpleCsv.encodeRows x))
|
||||||
|
Cell.cells
|
||||||
|
]
|
||||||
]
|
]
|
||||||
|
|
||||||
w64s :: Word64s
|
w64s :: Word64s
|
||||||
|
|
31
bench/SimpleCsv.hs
Normal file
31
bench/SimpleCsv.hs
Normal file
|
@ -0,0 +1,31 @@
|
||||||
|
{-# language LambdaCase #-}
|
||||||
|
|
||||||
|
-- A variant of CSV encoding that does not perform
|
||||||
|
-- any escaping or quoting. This is in its own module
|
||||||
|
-- to make it easy to analyze the GHC Core that it
|
||||||
|
-- gets compiled to.
|
||||||
|
module SimpleCsv
|
||||||
|
( encodeRows
|
||||||
|
) where
|
||||||
|
|
||||||
|
import Cell (Cell(..))
|
||||||
|
import Data.Primitive (SmallArray)
|
||||||
|
|
||||||
|
import qualified Data.Foldable as F
|
||||||
|
import qualified Data.ByteArray.Builder as B
|
||||||
|
|
||||||
|
encodeRows :: SmallArray (SmallArray Cell) -> B.Builder
|
||||||
|
encodeRows = F.foldr
|
||||||
|
(\r x -> encodeSimpleCsvRow r (B.ascii '\n' <> x))
|
||||||
|
mempty
|
||||||
|
|
||||||
|
encodeSimpleCsvRow :: SmallArray Cell -> B.Builder -> B.Builder
|
||||||
|
encodeSimpleCsvRow cs b = F.foldr
|
||||||
|
(\c x -> encodeSimpleCsvCell c <> B.ascii ',' <> x)
|
||||||
|
b
|
||||||
|
cs
|
||||||
|
|
||||||
|
encodeSimpleCsvCell :: Cell -> B.Builder
|
||||||
|
encodeSimpleCsvCell = \case
|
||||||
|
CellNumber n -> B.word32Dec n
|
||||||
|
CellString t -> B.shortTextUtf8 t
|
|
@ -87,9 +87,12 @@ benchmark bench
|
||||||
, natural-arithmetic
|
, natural-arithmetic
|
||||||
, primitive
|
, primitive
|
||||||
, small-bytearray-builder
|
, small-bytearray-builder
|
||||||
|
, text-short
|
||||||
ghc-options: -Wall -O2
|
ghc-options: -Wall -O2
|
||||||
default-language: Haskell2010
|
default-language: Haskell2010
|
||||||
hs-source-dirs: bench, common
|
hs-source-dirs: bench, common
|
||||||
main-is: Main.hs
|
main-is: Main.hs
|
||||||
other-modules:
|
other-modules:
|
||||||
|
Cell
|
||||||
HexWord64
|
HexWord64
|
||||||
|
SimpleCsv
|
||||||
|
|
|
@ -403,7 +403,7 @@ word8PaddedUpperHex w =
|
||||||
-- | Encode an ASCII char.
|
-- | Encode an ASCII char.
|
||||||
-- Precondition: Input must be an ASCII character. This is not checked.
|
-- Precondition: Input must be an ASCII character. This is not checked.
|
||||||
ascii :: Char -> Builder
|
ascii :: Char -> Builder
|
||||||
ascii c = fromBounded Nat.constant (Bounded.char c)
|
ascii c = fromBounded Nat.constant (Bounded.ascii c)
|
||||||
|
|
||||||
-- | Encode an UTF8 char. This only uses as much space as is required.
|
-- | Encode an UTF8 char. This only uses as much space as is required.
|
||||||
char :: Char -> Builder
|
char :: Char -> Builder
|
||||||
|
|
|
@ -507,10 +507,12 @@ word8LowerHex# w#
|
||||||
where
|
where
|
||||||
w = W# w#
|
w = W# w#
|
||||||
|
|
||||||
-- | Encode an ASCII char.
|
-- | Encode an ASCII character.
|
||||||
-- Precondition: Input must be an ASCII character. This is not checked.
|
-- Precondition: Input must be an ASCII character. This is not checked.
|
||||||
ascii :: Char -> Builder 1
|
ascii :: Char -> Builder 1
|
||||||
ascii c = word8 (fromIntegral @Int @Word8 (ord c))
|
ascii (C# c) = Unsafe.construct $ \(MutableByteArray arr) (I# off) -> do
|
||||||
|
primitive_ (writeCharArray# arr off c)
|
||||||
|
pure (I# (off +# 1# ))
|
||||||
|
|
||||||
-- | Encode a character as UTF-8. This only uses as much space as is required.
|
-- | Encode a character as UTF-8. This only uses as much space as is required.
|
||||||
char :: Char -> Builder 4
|
char :: Char -> Builder 4
|
||||||
|
|
Loading…
Reference in a new issue