From 37aaeac18af162c4ca45a51b0b0325419a95aaa0 Mon Sep 17 00:00:00 2001 From: Andrew Martin Date: Mon, 5 Aug 2019 12:07:02 -0400 Subject: [PATCH] Add doubleDec for rendering floating-point numbers --- CHANGELOG.md | 2 + small-bytearray-builder.cabal | 1 + src/Data/ByteArray/Builder/Small.hs | 10 ++ src/Data/ByteArray/Builder/Small/Unsafe.hs | 110 +++++++++++++++++++++ test/Main.hs | 20 ++++ 5 files changed, 143 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 64df2fa..611ed1e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,8 @@ * Correct a serious error in the implementation of `bytes`. * Make `pasteGrowST` accept an initial offset. * Add a `pasteGrowST` for length-indexed builders. +* Add function for rendering floating-point numbers in a slightly + inaccurate way. ## 0.1.1.0 -- 2019-07-30 diff --git a/small-bytearray-builder.cabal b/small-bytearray-builder.cabal index 311ca91..c0a17c5 100644 --- a/small-bytearray-builder.cabal +++ b/small-bytearray-builder.cabal @@ -66,6 +66,7 @@ test-suite test , small-bytearray-builder , QuickCheck >=2.13.1 && <2.14 , tasty-quickcheck >=0.10.1 && <0.11 + , tasty-hunit >=0.10.0.2 && <0.11 , tasty >=1.2.3 && <1.3 , primitive , vector diff --git a/src/Data/ByteArray/Builder/Small.hs b/src/Data/ByteArray/Builder/Small.hs index 8762826..152e60e 100644 --- a/src/Data/ByteArray/Builder/Small.hs +++ b/src/Data/ByteArray/Builder/Small.hs @@ -34,6 +34,9 @@ module Data.ByteArray.Builder.Small , word64BE , word32BE , word16BE + -- * Encode Floating-Point Types + -- ** Human-Readable + , doubleDec ) where import Control.Monad.Primitive @@ -194,6 +197,13 @@ bytes (Bytes src soff slen) = construct $ \(MutableBytes arr off len) -> if len word64Dec :: Word64 -> Builder word64Dec w = fromUnsafe (Unsafe.word64Dec w) +-- | Encode a double-floating-point number, using decimal notation or +-- scientific notation depending on the magnitude. This has undefined +-- behavior when representing @+inf@, @-inf@, and @NaN@. It will not +-- crash, but the generated numbers will be nonsense. +doubleDec :: Double -> Builder +doubleDec w = fromUnsafe (Unsafe.doubleDec w) + -- | Encodes a signed 64-bit integer as decimal. -- This encoding never starts with a zero unless the argument was zero. -- Negative numbers are preceded by a minus sign. Positive numbers diff --git a/src/Data/ByteArray/Builder/Small/Unsafe.hs b/src/Data/ByteArray/Builder/Small/Unsafe.hs index f4feabc..7f7f4c9 100644 --- a/src/Data/ByteArray/Builder/Small/Unsafe.hs +++ b/src/Data/ByteArray/Builder/Small/Unsafe.hs @@ -36,6 +36,8 @@ module Data.ByteArray.Builder.Small.Unsafe , word32BE , word16BE , word8 + -- * Encode Floating-Point Types + , doubleDec ) where import Control.Monad.Primitive @@ -50,6 +52,7 @@ import GHC.Int import Data.Kind import GHC.TypeLits (KnownNat,Nat,type (+),natVal') import Data.Primitive.ByteArray.Offset (MutableByteArrayOffset(..)) +import Control.Monad (when) import qualified Data.Primitive as PM @@ -126,6 +129,13 @@ append (Builder f) (Builder g) = Builder $ \arr off0 s0 -> case f arr off0 s0 of (# s1, r #) -> g arr r s1 +-- | Encode a double-floating-point number, using decimal notation or +-- scientific notation depending on the magnitude. This has undefined +-- behavior when representing @+inf@, @-inf@, and @NaN@. It will not +-- crash, but the generated numbers will be nonsense. +doubleDec :: Double -> Builder 32 +doubleDec (D# d) = Builder (\arr off0 s0 -> doubleDec# d arr off0 s0) + -- | Requires up to 19 bytes. Encodes an unsigned 64-bit integer as decimal. -- This encoding never starts with a zero unless the argument was zero. word64Dec :: Word64 -> Builder 19 @@ -339,3 +349,103 @@ unST (ST f) = f shrinkMutableByteArray :: MutableByteArray s -> Int -> ST s () shrinkMutableByteArray (MutableByteArray arr) (I# sz) = primitive_ (shrinkMutableByteArray# arr sz) + +-- This is adapted from androider's code in https://stackoverflow.com/a/7097567 +-- The checks for infinity and NaN have been removed. Note that this is a little +-- inaccurate. This is very visible when encoding a number like 2.25, which +-- is perfectly represented as a IEEE 754 floating point number but is goofed +-- up by this function. +-- If you modify this function, please take a took at the resulting core. +-- It currently performs no boxing at all, and it would be nice to keep +-- it that way. +doubleDec# :: forall s. + Double# -> MutableByteArray# s -> Int# -> State# s -> (# State# s, Int# #) +{-# noinline doubleDec# #-} +doubleDec# d# marr# off# s0 = unIntST s0 $ do + let marr = MutableByteArray marr# + let d0 = D# d# + let off0 = I# off# + if d0 == 0 + then do + writeByteArray marr off0 (c2w '0') + pure (off0 + 1) + else do + let neg = d0 < 0 + off1 <- if neg + then do + writeByteArray marr off0 (c2w '-') + pure (off0 + 1) + else pure off0 + let d1 = abs d0 + let mag0 = floor (logBase10 d1) :: Int + let useExp = (mag0 >= 14 || (neg && mag0 >= 9) || mag0 <= (-9)) + -- This straightforward adaptation of the C code is awkward + -- in Haskell. Binding the triple where mag1 might not even + -- get used is strange. + let !(!d2,!mag1,!mag0A) = if useExp + then + let mag0' = if mag0 < 0 then mag0 - 1 else mag0 + in (d1 / (10.0 ** fromIntegral @Int @Double mag0'), mag0', 0) + else (d1,0,mag0) + let mag0B = if mag0A < 1 then 0 else mag0A + let goNum :: Double -> Int -> Int -> ST s Int + goNum !dA0 !mag !offA0 = if (dA0 > doublePrecision || mag >= 0) + then do + let weight = 10.0 ** (fromIntegral @Int @Double mag) + -- We should actually check weight with isinf here, + -- but we do not. + (dA1,offA1) <- if weight > 0 + then do + -- TODO: use a better floor function + let digit = ((floor :: Double -> Int) (dA0 / weight)) + let discard = fromIntegral @Int @Double digit * weight + writeByteArray marr offA0 + (fromIntegral @Int @Word8 (digit + ord '0')) + pure (dA0 - discard,offA0 + 1) + else pure (dA0,offA0) + offA2 <- if mag == 0 && dA1 > 0 + then do + writeByteArray marr offA1 (c2w '.') + pure (offA1 + 1) + else pure offA1 + goNum dA1 (mag - 1) offA2 + else pure offA0 + !off2 <- goNum d2 mag0B off1 + off3 <- if useExp + then do + writeByteArray marr off2 (c2w 'e') + !mag2 <- if mag1 > 0 + then do + writeByteArray marr (off2 + 1) (c2w '+') + pure mag1 + else do + writeByteArray marr (off2 + 1) (c2w '-') + pure (-mag1) + let goMag !mag !off = if mag > 0 + then do + let (q,r) = quotRem mag 10 + writeByteArray marr off (fromIntegral @Int @Word8 (ord '0' + r)) + goMag q (off + 1) + else pure off + !off3 <- goMag mag2 (off2 + 2) + reverseBytes marr (off2 + 2) (off3 - 1) + pure off3 + else pure off2 + pure off3 + +doublePrecision :: Double +doublePrecision = 0.00000000000001 + +unIntST :: State# s -> ST s Int -> (# State# s, Int# #) +{-# inline unIntST #-} +unIntST s0 (ST f) = case f s0 of + (# s1, I# i #) -> (# s1, i #) + +-- This is slightly inaccurate. I think this can actually cause +-- problems in some situations. The log10 function from C would +-- be better. The inaccuracy here cause the logarithm to be slightly +-- larger than it should be. There might actually be a simple way to +-- fix this by just using recursion to compute it. We just floor the +-- result anyway. Hmm... +logBase10 :: Double -> Double +logBase10 d = log d / 2.30258509299 diff --git a/test/Main.hs b/test/Main.hs index f362628..00ba39f 100644 --- a/test/Main.hs +++ b/test/Main.hs @@ -12,6 +12,7 @@ import Debug.Trace import Test.Tasty (defaultMain,testGroup,TestTree) import Test.QuickCheck ((===)) import Text.Printf (printf) +import Test.Tasty.HUnit ((@=?)) import qualified Data.ByteString.Builder as BB import qualified Data.Primitive as PM import qualified Data.List as L @@ -20,6 +21,7 @@ import qualified Test.Tasty.QuickCheck as TQC import qualified Test.QuickCheck as QC import qualified GHC.Exts as Exts import qualified Data.ByteString.Lazy.Char8 as LB +import qualified Test.Tasty.HUnit as THU import qualified HexWord64 @@ -51,6 +53,24 @@ tests = testGroup "Tests" (runArray word64Dec (V.fromList xs)) === pack (foldMap show xs) + , THU.testCase "doubleDec-A" $ + pack (show (2 :: Int)) @=? run 1 (doubleDec 2.0) + , THU.testCase "doubleDec-B" $ + pack (show (2.5 :: Double)) @=? run 1 (doubleDec 2.5) + , THU.testCase "doubleDec-C" $ + pack ("1e+15") @=? run 1 (doubleDec 1e15) + , THU.testCase "doubleDec-D" $ + pack ("-42") @=? run 1 (doubleDec (-42)) + , THU.testCase "doubleDec-E" $ + pack ("-8.88888888888888e+14") @=? run 1 (doubleDec (-888888888888888.8888888)) + , THU.testCase "doubleDec-F" $ + pack ("42") @=? run 1 (doubleDec 42) + , THU.testCase "doubleDec-G" $ + pack ("0") @=? run 1 (doubleDec 0) + , THU.testCase "doubleDec-H" $ + pack ("0.5") @=? run 1 (doubleDec 0.5) + , THU.testCase "doubleDec-I" $ + pack ("-0.5") @=? run 1 (doubleDec (-0.5)) ] , testGroup "alternate" [ TQC.testProperty "HexWord64" $ \x y ->