Fix doubleDec

2020-12-18 13:03:04 -05:00 · 2020-12-18 13:03:04 -05:00 · 57e2c7b777
commit 57e2c7b777
parent 6b43fea3d5
5 changed files with 101 additions and 93 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -5,6 +5,10 @@ Note: Prior to version 0.3.4.0, this library was named
 `small-bytearray-builder` is now just a compatibility shim
 to ease the migration process.

+## 0.3.7.1 -- 2020-??-??
+
+* Fix `doubleDec`, which was encoding small numbers incorrectly.
+
 ## 0.3.7.0 -- 2020-11-06

 * Fix build error in test suite.
--- a/bytebuild.cabal
+++ b/bytebuild.cabal
@ -1,6 +1,6 @@
 cabal-version: 2.2
 name: bytebuild
-version: 0.3.7.0
+version: 0.3.7.1
 synopsis: Serialize to a small byte arrays
 description:
  This is similar to the builder facilities provided by
@ -60,6 +60,7 @@ library
  ghc-options: -Wall -O2
  hs-source-dirs: src
  default-language: Haskell2010
+  c-sources: cbits/bytebuild_custom.c

 test-suite test
  default-language: Haskell2010
--- a/cbits/bytebuild_custom.c
+++ b/cbits/bytebuild_custom.c
@ -0,0 +1,73 @@
+#include "Rts.h"
+#include <stdint.h>
+
+#define BYTEBUILD_DOUBLE_PRECISION 0.00000000000001
+
+HsInt bytebuild_paste_double(char *s0, HsInt off, double n) {
+    char* s = s0 + off;
+    // handle special cases
+    if (n == 0 || isnan(n) || isinf(n)) {
+        *s = '0';
+        return 1;
+    } else {
+        int digit, m, m1;
+        char *c = s;
+        int neg = (n < 0);
+        if (neg)
+            n = -n;
+        // calculate magnitude
+        m = log10(n);
+        int useExp = (m >= 14 || (neg && m >= 9) || m <= -9);
+        if (neg)
+            *(c++) = '-';
+        // set up for scientific notation
+        if (useExp) {
+            if (m < 0)
+               m -= 1.0;
+            n = n / pow(10.0, m);
+            m1 = m;
+            m = 0;
+        }
+        if (m < 1.0) {
+            m = 0;
+        }
+        // convert the number
+        while (n > BYTEBUILD_DOUBLE_PRECISION || m >= 0) {
+            double weight = pow(10.0, m);
+            if (weight > 0 && !isinf(weight)) {
+                digit = floor(n / weight);
+                n -= (digit * weight);
+                *(c++) = '0' + digit;
+            }
+            if (m == 0 && n > 0)
+                *(c++) = '.';
+            m--;
+        }
+        if (useExp) {
+            // convert the exponent
+            int i, j;
+            *(c++) = 'e';
+            if (m1 > 0) {
+                *(c++) = '+';
+            } else {
+                *(c++) = '-';
+                m1 = -m1;
+            }
+            m = 0;
+            while (m1 > 0) {
+                *(c++) = '0' + m1 % 10;
+                m1 /= 10;
+                m++;
+            }
+            c -= m;
+            for (i = 0, j = m-1; i<j; i++, j--) {
+                // swap without temporary
+                c[i] ^= c[j];
+                c[j] ^= c[i];
+                c[i] ^= c[j];
+            }
+            c += m;
+        }
+        return (c - s);
+    }
+}
--- a/src/Data/Bytes/Builder/Bounded.hs
+++ b/src/Data/Bytes/Builder/Bounded.hs
@ -9,6 +9,7 @@
 {-# language TypeApplications #-}
 {-# language TypeOperators #-}
 {-# language UnboxedTuples #-}
+{-# language UnliftedFFITypes #-}

 -- | The functions in this module are explict about the maximum number
 -- of bytes they require.
@ -113,6 +114,7 @@ import Data.Primitive.ByteArray.Offset (MutableByteArrayOffset(..))
 import Data.WideWord (Word128(Word128),Word256(Word256))
 import GHC.Exts
 import GHC.Int (Int64(I64#),Int32(I32#),Int16(I16#),Int8(I8#))
+import GHC.IO (unsafeIOToST)
 import GHC.ST (ST(ST))
 import GHC.TypeLits (type (+))
 import GHC.Word (Word8(W8#),Word16(W16#),Word32(W32#),Word64(W64#))
@ -1032,100 +1034,12 @@ shrinkMutableByteArray (MutableByteArray arr) (I# sz) =
 -- inaccurate. This is very visible when encoding a number like 2.25, which
 -- is perfectly represented as an IEEE 754 floating point number but is goofed
 -- up by this function.
-- If you modify this function, please take a took at the resulting core.
-- It currently performs no boxing at all, and it would be nice to keep
-- it that way.
 doubleDec# :: forall s.
  Double# -> MutableByteArray# s -> Int# -> State# s -> (# State# s, Int# #)
-{-# noinline doubleDec# #-}
-doubleDec# d# marr# off# s0 = unIntST s0 $ do
-  let marr = MutableByteArray marr#
-  let d0 = D# d#
-  let off0 = I# off#
-  if d0 == 0
-    then do
-      writeByteArray marr off0 (c2w '0')
-      pure (off0 + 1)
-    else do
-      let neg = d0 < 0
-      off1 <- if neg
-        then do
-          writeByteArray marr off0 (c2w '-')
-          pure (off0 + 1)
-        else pure off0
-      let d1 = abs d0
-      let mag0 = floor (logBase10 d1) :: Int
-      let useExp = (mag0 >= 14 || (neg && mag0 >= 9) || mag0 <= (-9))
-      -- This straightforward adaptation of the C code is awkward
-      -- in Haskell. Binding the triple where mag1 might not even
-      -- get used is strange.
-      let !(!d2,!mag1,!mag0A) = if useExp
-            then
-              let mag0' = if mag0 < 0 then mag0 - 1 else mag0
-               in (d1 / (10.0 ** fromIntegral @Int @Double mag0'), mag0', 0)
-            else (d1,0,mag0)
-      let mag0B = if mag0A < 1 then 0 else mag0A
-      let goNum :: Double -> Int -> Int -> ST s Int
-          goNum !dA0 !mag !offA0 = if (dA0 > doublePrecision || mag >= 0)
-            then do
-              let weight = 10.0 ** (fromIntegral @Int @Double mag)
-              -- We should actually check weight with isinf here,
-              -- but we do not.
-              (dA1,offA1) <- if weight > 0
-                then do
-                  -- TODO: use a better floor function
-                  let digit = ((floor :: Double -> Int) (dA0 / weight))
-                  let discard = fromIntegral @Int @Double digit * weight
-                  writeByteArray marr offA0
-                    (fromIntegral @Int @Word8 (digit + ord '0'))
-                  pure (dA0 - discard,offA0 + 1)
-                else pure (dA0,offA0)
-              offA2 <- if mag == 0 && dA1 > 0
-                then do
-                  writeByteArray marr offA1 (c2w '.')
-                  pure (offA1 + 1)
-                else pure offA1
-              goNum dA1 (mag - 1) offA2
-            else pure offA0
-      !off2 <- goNum d2 mag0B off1
-      off3 <- if useExp
-        then do
-          writeByteArray marr off2 (c2w 'e')
-          !mag2 <- if mag1 > 0
-            then do
-              writeByteArray marr (off2 + 1) (c2w '+')
-              pure mag1
-            else do
-              writeByteArray marr (off2 + 1) (c2w '-')
-              pure (-mag1)
-          let goMag !mag !off = if mag > 0
-                then do
-                  let (q,r) = quotRem mag 10
-                  writeByteArray marr off (fromIntegral @Int @Word8 (ord '0' + r))
-                  goMag q (off + 1)
-                else pure off
-          !off3 <- goMag mag2 (off2 + 2)
-          reverseBytes marr (off2 + 2) (off3 - 1)
-          pure off3
-        else pure off2
-      pure off3
-
-doublePrecision :: Double
-doublePrecision = 0.00000000000001
-
-unIntST :: State# s -> ST s Int -> (# State# s, Int# #)
-{-# inline unIntST #-}
-unIntST s0 (ST f) = case f s0 of
-  (# s1, I# i #) -> (# s1, i #)
-
-- This is slightly inaccurate. I think this can actually cause
-- problems in some situations. The log10 function from C would
-- be better. The inaccuracy here cause the logarithm to be slightly
-- larger than it should be. There might actually be a simple way to
-- fix this by just using recursion to compute it. We just floor the
-- result anyway. Hmm...
-logBase10 :: Double -> Double
-logBase10 d = log d / 2.30258509299
+doubleDec# d# marr# off# s0 =
+  case unsafeIOToST (c_paste_double marr# off# d#) of
+    ST f -> case f s0 of
+      (# s1, I# r #) -> (# s1, r #)

 -- Based on C code from https://stackoverflow.com/a/5558614
 -- For numbers less than 1073741829, this gives a correct answer.
@ -1134,3 +1048,7 @@ approxDiv10 !n = unsafeShiftR (0x1999999A * n) 32

 unsafeWordToWord8 :: Word -> Word8
 unsafeWordToWord8 (W# w) = W8# w
+
+foreign import ccall unsafe "bytebuild_paste_double" c_paste_double ::
+  MutableByteArray# s -> Int# -> Double# -> IO Int
+
--- a/test/Main.hs
+++ b/test/Main.hs
@ -1,4 +1,5 @@
 {-# language BangPatterns #-}
+{-# language NumericUnderscores #-}
 {-# language ScopedTypeVariables #-}
 {-# language TypeApplications #-}
 {-# language OverloadedStrings #-}
@ -141,6 +142,8 @@ tests = testGroup "Tests"
        pack ("999999999") @=? runConcat 1 (doubleDec 999999999)
    , THU.testCase "doubleDec-K" $
        pack ("-99999999") @=? runConcat 1 (doubleDec (-99999999))
+    , THU.testCase "doubleDec-L" $
+        AsciiByteArray (pack ("6.66666666666666e-12")) @=? AsciiByteArray (runConcat 1 (doubleDec (2 / 300_000_000_000)))
    , THU.testCase "shortTextJsonString-A" $
        pack ("\"hello\"") @=? runConcat 1 (shortTextJsonString "hello")
    , THU.testCase "shortTextJsonString-B" $
@ -316,6 +319,15 @@ runConcat n = Chunks.concatU . run n
 c2w :: Char -> Word8
 c2w = fromIntegral . ord

+-- Just a wrapper with a show instance that displays as ascii when possible.
+newtype AsciiByteArray = AsciiByteArray ByteArray
+  deriving (Eq)
+
+instance Show AsciiByteArray where
+  show (AsciiByteArray b) = if Bytes.all (\w -> w >= 32 && w < 127) (Bytes.fromByteArray b)
+    then Bytes.toLatinString (Bytes.fromByteArray b)
+    else show (show b)
+
 instance Arbitrary Word128 where
  arbitrary = liftA2 Word128 TQC.arbitrary TQC.arbitrary