Fix doubleDec

2020-12-18 13:03:04 -05:00 · 2020-12-18 13:03:04 -05:00 · 57e2c7b777
commit 57e2c7b777
parent 6b43fea3d5
5 changed files with 101 additions and 93 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -5,6 +5,10 @@ Note: Prior to version 0.3.4.0, this library was named
 `small-bytearray-builder` is now just a compatibility shim
 to ease the migration process.
 ## 0.3.7.1 -- 2020-??-??
 * Fix `doubleDec`, which was encoding small numbers incorrectly.
 ## 0.3.7.0 -- 2020-11-06
 * Fix build error in test suite.
--- a/bytebuild.cabal
+++ b/bytebuild.cabal
@ -1,6 +1,6 @@
 cabal-version: 2.2
 name: bytebuild
-version: 0.3.7.0
+version: 0.3.7.1
 synopsis: Serialize to a small byte arrays
 description:
  This is similar to the builder facilities provided by
@ -60,6 +60,7 @@ library
  ghc-options: -Wall -O2
  hs-source-dirs: src
  default-language: Haskell2010
  c-sources: cbits/bytebuild_custom.c
 test-suite test
  default-language: Haskell2010
--- a/cbits/bytebuild_custom.c
+++ b/cbits/bytebuild_custom.c
@ -0,0 +1,73 @@
 #include "Rts.h"
 #include <stdint.h>
 #define BYTEBUILD_DOUBLE_PRECISION 0.00000000000001
 HsInt bytebuild_paste_double(char *s0, HsInt off, double n) {
    char* s = s0 + off;
    // handle special cases
    if (n == 0 || isnan(n) || isinf(n)) {
        *s = '0';
        return 1;
    } else {
        int digit, m, m1;
        char *c = s;
        int neg = (n < 0);
        if (neg)
            n = -n;
        // calculate magnitude
        m = log10(n);
        int useExp = (m >= 14 || (neg && m >= 9) || m <= -9);
        if (neg)
            *(c++) = '-';
        // set up for scientific notation
        if (useExp) {
            if (m < 0)
               m -= 1.0;
            n = n / pow(10.0, m);
            m1 = m;
            m = 0;
        }
        if (m < 1.0) {
            m = 0;
        }
        // convert the number
        while (n > BYTEBUILD_DOUBLE_PRECISION || m >= 0) {
            double weight = pow(10.0, m);
            if (weight > 0 && !isinf(weight)) {
                digit = floor(n / weight);
                n -= (digit * weight);
                *(c++) = '0' + digit;
            }
            if (m == 0 && n > 0)
                *(c++) = '.';
            m--;
        }
        if (useExp) {
            // convert the exponent
            int i, j;
            *(c++) = 'e';
            if (m1 > 0) {
                *(c++) = '+';
            } else {
                *(c++) = '-';
                m1 = -m1;
            }
            m = 0;
            while (m1 > 0) {
                *(c++) = '0' + m1 % 10;
                m1 /= 10;
                m++;
            }
            c -= m;
            for (i = 0, j = m-1; i<j; i++, j--) {
                // swap without temporary
                c[i] ^= c[j];
                c[j] ^= c[i];
                c[i] ^= c[j];
            }
            c += m;
        }
        return (c - s);
    }
 }
--- a/src/Data/Bytes/Builder/Bounded.hs
+++ b/src/Data/Bytes/Builder/Bounded.hs
@ -9,6 +9,7 @@
 {-# language TypeApplications #-}
 {-# language TypeOperators #-}
 {-# language UnboxedTuples #-}
 {-# language UnliftedFFITypes #-}
 -- | The functions in this module are explict about the maximum number
 -- of bytes they require.
@ -113,6 +114,7 @@ import Data.Primitive.ByteArray.Offset (MutableByteArrayOffset(..))
 import Data.WideWord (Word128(Word128),Word256(Word256))
 import GHC.Exts
 import GHC.Int (Int64(I64#),Int32(I32#),Int16(I16#),Int8(I8#))
 import GHC.IO (unsafeIOToST)
 import GHC.ST (ST(ST))
 import GHC.TypeLits (type (+))
 import GHC.Word (Word8(W8#),Word16(W16#),Word32(W32#),Word64(W64#))
@ -1032,100 +1034,12 @@ shrinkMutableByteArray (MutableByteArray arr) (I# sz) =
 -- inaccurate. This is very visible when encoding a number like 2.25, which
 -- is perfectly represented as an IEEE 754 floating point number but is goofed
 -- up by this function.
 -- If you modify this function, please take a took at the resulting core.
 -- It currently performs no boxing at all, and it would be nice to keep
 -- it that way.
 doubleDec# :: forall s.
  Double# -> MutableByteArray# s -> Int# -> State# s -> (# State# s, Int# #)
-{-# noinline doubleDec# #-}
+doubleDec# d# marr# off# s0 =
-doubleDec# d# marr# off# s0 = unIntST s0 $ do
+  case unsafeIOToST (c_paste_double marr# off# d#) of
-  let marr = MutableByteArray marr#
+    ST f -> case f s0 of
-  let d0 = D# d#
+      (# s1, I# r #) -> (# s1, r #)
  let off0 = I# off#
  if d0 == 0
    then do
      writeByteArray marr off0 (c2w '0')
      pure (off0 + 1)
    else do
      let neg = d0 < 0
      off1 <- if neg
        then do
          writeByteArray marr off0 (c2w '-')
          pure (off0 + 1)
        else pure off0
      let d1 = abs d0
      let mag0 = floor (logBase10 d1) :: Int
      let useExp = (mag0 >= 14 || (neg && mag0 >= 9) || mag0 <= (-9))
      -- This straightforward adaptation of the C code is awkward
      -- in Haskell. Binding the triple where mag1 might not even
      -- get used is strange.
      let !(!d2,!mag1,!mag0A) = if useExp
            then
              let mag0' = if mag0 < 0 then mag0 - 1 else mag0
               in (d1 / (10.0 ** fromIntegral @Int @Double mag0'), mag0', 0)
            else (d1,0,mag0)
      let mag0B = if mag0A < 1 then 0 else mag0A
      let goNum :: Double -> Int -> Int -> ST s Int
          goNum !dA0 !mag !offA0 = if (dA0 > doublePrecision || mag >= 0)
            then do
              let weight = 10.0 ** (fromIntegral @Int @Double mag)
              -- We should actually check weight with isinf here,
              -- but we do not.
              (dA1,offA1) <- if weight > 0
                then do
                  -- TODO: use a better floor function
                  let digit = ((floor :: Double -> Int) (dA0 / weight))
                  let discard = fromIntegral @Int @Double digit * weight
                  writeByteArray marr offA0
                    (fromIntegral @Int @Word8 (digit + ord '0'))
                  pure (dA0 - discard,offA0 + 1)
                else pure (dA0,offA0)
              offA2 <- if mag == 0 && dA1 > 0
                then do
                  writeByteArray marr offA1 (c2w '.')
                  pure (offA1 + 1)
                else pure offA1
              goNum dA1 (mag - 1) offA2
            else pure offA0
      !off2 <- goNum d2 mag0B off1
      off3 <- if useExp
        then do
          writeByteArray marr off2 (c2w 'e')
          !mag2 <- if mag1 > 0
            then do
              writeByteArray marr (off2 + 1) (c2w '+')
              pure mag1
            else do
              writeByteArray marr (off2 + 1) (c2w '-')
              pure (-mag1)
          let goMag !mag !off = if mag > 0
                then do
                  let (q,r) = quotRem mag 10
                  writeByteArray marr off (fromIntegral @Int @Word8 (ord '0' + r))
                  goMag q (off + 1)
                else pure off
          !off3 <- goMag mag2 (off2 + 2)
          reverseBytes marr (off2 + 2) (off3 - 1)
          pure off3
        else pure off2
      pure off3
 doublePrecision :: Double
 doublePrecision = 0.00000000000001
 unIntST :: State# s -> ST s Int -> (# State# s, Int# #)
 {-# inline unIntST #-}
 unIntST s0 (ST f) = case f s0 of
  (# s1, I# i #) -> (# s1, i #)
 -- This is slightly inaccurate. I think this can actually cause
 -- problems in some situations. The log10 function from C would
 -- be better. The inaccuracy here cause the logarithm to be slightly
 -- larger than it should be. There might actually be a simple way to
 -- fix this by just using recursion to compute it. We just floor the
 -- result anyway. Hmm...
 logBase10 :: Double -> Double
 logBase10 d = log d / 2.30258509299
 -- Based on C code from https://stackoverflow.com/a/5558614
 -- For numbers less than 1073741829, this gives a correct answer.
@ -1134,3 +1048,7 @@ approxDiv10 !n = unsafeShiftR (0x1999999A * n) 32
 unsafeWordToWord8 :: Word -> Word8
 unsafeWordToWord8 (W# w) = W8# w
 foreign import ccall unsafe "bytebuild_paste_double" c_paste_double ::
  MutableByteArray# s -> Int# -> Double# -> IO Int
--- a/test/Main.hs
+++ b/test/Main.hs
@ -1,4 +1,5 @@
 {-# language BangPatterns #-}
 {-# language NumericUnderscores #-}
 {-# language ScopedTypeVariables #-}
 {-# language TypeApplications #-}
 {-# language OverloadedStrings #-}
@ -141,6 +142,8 @@ tests = testGroup "Tests"
        pack ("999999999") @=? runConcat 1 (doubleDec 999999999)
    , THU.testCase "doubleDec-K" $
        pack ("-99999999") @=? runConcat 1 (doubleDec (-99999999))
    , THU.testCase "doubleDec-L" $
        AsciiByteArray (pack ("6.66666666666666e-12")) @=? AsciiByteArray (runConcat 1 (doubleDec (2 / 300_000_000_000)))
    , THU.testCase "shortTextJsonString-A" $
        pack ("\"hello\"") @=? runConcat 1 (shortTextJsonString "hello")
    , THU.testCase "shortTextJsonString-B" $
@ -316,6 +319,15 @@ runConcat n = Chunks.concatU . run n
 c2w :: Char -> Word8
 c2w = fromIntegral . ord
 -- Just a wrapper with a show instance that displays as ascii when possible.
 newtype AsciiByteArray = AsciiByteArray ByteArray
  deriving (Eq)
 instance Show AsciiByteArray where
  show (AsciiByteArray b) = if Bytes.all (\w -> w >= 32 && w < 127) (Bytes.fromByteArray b)
    then Bytes.toLatinString (Bytes.fromByteArray b)
    else show (show b)
 instance Arbitrary Word128 where
  arbitrary = liftA2 Word128 TQC.arbitrary TQC.arbitrary