Fix doubleDec
This commit is contained in:
parent
6b43fea3d5
commit
57e2c7b777
5 changed files with 101 additions and 93 deletions
|
@ -5,6 +5,10 @@ Note: Prior to version 0.3.4.0, this library was named
|
||||||
`small-bytearray-builder` is now just a compatibility shim
|
`small-bytearray-builder` is now just a compatibility shim
|
||||||
to ease the migration process.
|
to ease the migration process.
|
||||||
|
|
||||||
|
## 0.3.7.1 -- 2020-??-??
|
||||||
|
|
||||||
|
* Fix `doubleDec`, which was encoding small numbers incorrectly.
|
||||||
|
|
||||||
## 0.3.7.0 -- 2020-11-06
|
## 0.3.7.0 -- 2020-11-06
|
||||||
|
|
||||||
* Fix build error in test suite.
|
* Fix build error in test suite.
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
cabal-version: 2.2
|
cabal-version: 2.2
|
||||||
name: bytebuild
|
name: bytebuild
|
||||||
version: 0.3.7.0
|
version: 0.3.7.1
|
||||||
synopsis: Serialize to a small byte arrays
|
synopsis: Serialize to a small byte arrays
|
||||||
description:
|
description:
|
||||||
This is similar to the builder facilities provided by
|
This is similar to the builder facilities provided by
|
||||||
|
@ -60,6 +60,7 @@ library
|
||||||
ghc-options: -Wall -O2
|
ghc-options: -Wall -O2
|
||||||
hs-source-dirs: src
|
hs-source-dirs: src
|
||||||
default-language: Haskell2010
|
default-language: Haskell2010
|
||||||
|
c-sources: cbits/bytebuild_custom.c
|
||||||
|
|
||||||
test-suite test
|
test-suite test
|
||||||
default-language: Haskell2010
|
default-language: Haskell2010
|
||||||
|
|
73
cbits/bytebuild_custom.c
Normal file
73
cbits/bytebuild_custom.c
Normal file
|
@ -0,0 +1,73 @@
|
||||||
|
#include "Rts.h"
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#define BYTEBUILD_DOUBLE_PRECISION 0.00000000000001
|
||||||
|
|
||||||
|
HsInt bytebuild_paste_double(char *s0, HsInt off, double n) {
|
||||||
|
char* s = s0 + off;
|
||||||
|
// handle special cases
|
||||||
|
if (n == 0 || isnan(n) || isinf(n)) {
|
||||||
|
*s = '0';
|
||||||
|
return 1;
|
||||||
|
} else {
|
||||||
|
int digit, m, m1;
|
||||||
|
char *c = s;
|
||||||
|
int neg = (n < 0);
|
||||||
|
if (neg)
|
||||||
|
n = -n;
|
||||||
|
// calculate magnitude
|
||||||
|
m = log10(n);
|
||||||
|
int useExp = (m >= 14 || (neg && m >= 9) || m <= -9);
|
||||||
|
if (neg)
|
||||||
|
*(c++) = '-';
|
||||||
|
// set up for scientific notation
|
||||||
|
if (useExp) {
|
||||||
|
if (m < 0)
|
||||||
|
m -= 1.0;
|
||||||
|
n = n / pow(10.0, m);
|
||||||
|
m1 = m;
|
||||||
|
m = 0;
|
||||||
|
}
|
||||||
|
if (m < 1.0) {
|
||||||
|
m = 0;
|
||||||
|
}
|
||||||
|
// convert the number
|
||||||
|
while (n > BYTEBUILD_DOUBLE_PRECISION || m >= 0) {
|
||||||
|
double weight = pow(10.0, m);
|
||||||
|
if (weight > 0 && !isinf(weight)) {
|
||||||
|
digit = floor(n / weight);
|
||||||
|
n -= (digit * weight);
|
||||||
|
*(c++) = '0' + digit;
|
||||||
|
}
|
||||||
|
if (m == 0 && n > 0)
|
||||||
|
*(c++) = '.';
|
||||||
|
m--;
|
||||||
|
}
|
||||||
|
if (useExp) {
|
||||||
|
// convert the exponent
|
||||||
|
int i, j;
|
||||||
|
*(c++) = 'e';
|
||||||
|
if (m1 > 0) {
|
||||||
|
*(c++) = '+';
|
||||||
|
} else {
|
||||||
|
*(c++) = '-';
|
||||||
|
m1 = -m1;
|
||||||
|
}
|
||||||
|
m = 0;
|
||||||
|
while (m1 > 0) {
|
||||||
|
*(c++) = '0' + m1 % 10;
|
||||||
|
m1 /= 10;
|
||||||
|
m++;
|
||||||
|
}
|
||||||
|
c -= m;
|
||||||
|
for (i = 0, j = m-1; i<j; i++, j--) {
|
||||||
|
// swap without temporary
|
||||||
|
c[i] ^= c[j];
|
||||||
|
c[j] ^= c[i];
|
||||||
|
c[i] ^= c[j];
|
||||||
|
}
|
||||||
|
c += m;
|
||||||
|
}
|
||||||
|
return (c - s);
|
||||||
|
}
|
||||||
|
}
|
|
@ -9,6 +9,7 @@
|
||||||
{-# language TypeApplications #-}
|
{-# language TypeApplications #-}
|
||||||
{-# language TypeOperators #-}
|
{-# language TypeOperators #-}
|
||||||
{-# language UnboxedTuples #-}
|
{-# language UnboxedTuples #-}
|
||||||
|
{-# language UnliftedFFITypes #-}
|
||||||
|
|
||||||
-- | The functions in this module are explict about the maximum number
|
-- | The functions in this module are explict about the maximum number
|
||||||
-- of bytes they require.
|
-- of bytes they require.
|
||||||
|
@ -113,6 +114,7 @@ import Data.Primitive.ByteArray.Offset (MutableByteArrayOffset(..))
|
||||||
import Data.WideWord (Word128(Word128),Word256(Word256))
|
import Data.WideWord (Word128(Word128),Word256(Word256))
|
||||||
import GHC.Exts
|
import GHC.Exts
|
||||||
import GHC.Int (Int64(I64#),Int32(I32#),Int16(I16#),Int8(I8#))
|
import GHC.Int (Int64(I64#),Int32(I32#),Int16(I16#),Int8(I8#))
|
||||||
|
import GHC.IO (unsafeIOToST)
|
||||||
import GHC.ST (ST(ST))
|
import GHC.ST (ST(ST))
|
||||||
import GHC.TypeLits (type (+))
|
import GHC.TypeLits (type (+))
|
||||||
import GHC.Word (Word8(W8#),Word16(W16#),Word32(W32#),Word64(W64#))
|
import GHC.Word (Word8(W8#),Word16(W16#),Word32(W32#),Word64(W64#))
|
||||||
|
@ -1032,100 +1034,12 @@ shrinkMutableByteArray (MutableByteArray arr) (I# sz) =
|
||||||
-- inaccurate. This is very visible when encoding a number like 2.25, which
|
-- inaccurate. This is very visible when encoding a number like 2.25, which
|
||||||
-- is perfectly represented as an IEEE 754 floating point number but is goofed
|
-- is perfectly represented as an IEEE 754 floating point number but is goofed
|
||||||
-- up by this function.
|
-- up by this function.
|
||||||
-- If you modify this function, please take a took at the resulting core.
|
|
||||||
-- It currently performs no boxing at all, and it would be nice to keep
|
|
||||||
-- it that way.
|
|
||||||
doubleDec# :: forall s.
|
doubleDec# :: forall s.
|
||||||
Double# -> MutableByteArray# s -> Int# -> State# s -> (# State# s, Int# #)
|
Double# -> MutableByteArray# s -> Int# -> State# s -> (# State# s, Int# #)
|
||||||
{-# noinline doubleDec# #-}
|
doubleDec# d# marr# off# s0 =
|
||||||
doubleDec# d# marr# off# s0 = unIntST s0 $ do
|
case unsafeIOToST (c_paste_double marr# off# d#) of
|
||||||
let marr = MutableByteArray marr#
|
ST f -> case f s0 of
|
||||||
let d0 = D# d#
|
(# s1, I# r #) -> (# s1, r #)
|
||||||
let off0 = I# off#
|
|
||||||
if d0 == 0
|
|
||||||
then do
|
|
||||||
writeByteArray marr off0 (c2w '0')
|
|
||||||
pure (off0 + 1)
|
|
||||||
else do
|
|
||||||
let neg = d0 < 0
|
|
||||||
off1 <- if neg
|
|
||||||
then do
|
|
||||||
writeByteArray marr off0 (c2w '-')
|
|
||||||
pure (off0 + 1)
|
|
||||||
else pure off0
|
|
||||||
let d1 = abs d0
|
|
||||||
let mag0 = floor (logBase10 d1) :: Int
|
|
||||||
let useExp = (mag0 >= 14 || (neg && mag0 >= 9) || mag0 <= (-9))
|
|
||||||
-- This straightforward adaptation of the C code is awkward
|
|
||||||
-- in Haskell. Binding the triple where mag1 might not even
|
|
||||||
-- get used is strange.
|
|
||||||
let !(!d2,!mag1,!mag0A) = if useExp
|
|
||||||
then
|
|
||||||
let mag0' = if mag0 < 0 then mag0 - 1 else mag0
|
|
||||||
in (d1 / (10.0 ** fromIntegral @Int @Double mag0'), mag0', 0)
|
|
||||||
else (d1,0,mag0)
|
|
||||||
let mag0B = if mag0A < 1 then 0 else mag0A
|
|
||||||
let goNum :: Double -> Int -> Int -> ST s Int
|
|
||||||
goNum !dA0 !mag !offA0 = if (dA0 > doublePrecision || mag >= 0)
|
|
||||||
then do
|
|
||||||
let weight = 10.0 ** (fromIntegral @Int @Double mag)
|
|
||||||
-- We should actually check weight with isinf here,
|
|
||||||
-- but we do not.
|
|
||||||
(dA1,offA1) <- if weight > 0
|
|
||||||
then do
|
|
||||||
-- TODO: use a better floor function
|
|
||||||
let digit = ((floor :: Double -> Int) (dA0 / weight))
|
|
||||||
let discard = fromIntegral @Int @Double digit * weight
|
|
||||||
writeByteArray marr offA0
|
|
||||||
(fromIntegral @Int @Word8 (digit + ord '0'))
|
|
||||||
pure (dA0 - discard,offA0 + 1)
|
|
||||||
else pure (dA0,offA0)
|
|
||||||
offA2 <- if mag == 0 && dA1 > 0
|
|
||||||
then do
|
|
||||||
writeByteArray marr offA1 (c2w '.')
|
|
||||||
pure (offA1 + 1)
|
|
||||||
else pure offA1
|
|
||||||
goNum dA1 (mag - 1) offA2
|
|
||||||
else pure offA0
|
|
||||||
!off2 <- goNum d2 mag0B off1
|
|
||||||
off3 <- if useExp
|
|
||||||
then do
|
|
||||||
writeByteArray marr off2 (c2w 'e')
|
|
||||||
!mag2 <- if mag1 > 0
|
|
||||||
then do
|
|
||||||
writeByteArray marr (off2 + 1) (c2w '+')
|
|
||||||
pure mag1
|
|
||||||
else do
|
|
||||||
writeByteArray marr (off2 + 1) (c2w '-')
|
|
||||||
pure (-mag1)
|
|
||||||
let goMag !mag !off = if mag > 0
|
|
||||||
then do
|
|
||||||
let (q,r) = quotRem mag 10
|
|
||||||
writeByteArray marr off (fromIntegral @Int @Word8 (ord '0' + r))
|
|
||||||
goMag q (off + 1)
|
|
||||||
else pure off
|
|
||||||
!off3 <- goMag mag2 (off2 + 2)
|
|
||||||
reverseBytes marr (off2 + 2) (off3 - 1)
|
|
||||||
pure off3
|
|
||||||
else pure off2
|
|
||||||
pure off3
|
|
||||||
|
|
||||||
doublePrecision :: Double
|
|
||||||
doublePrecision = 0.00000000000001
|
|
||||||
|
|
||||||
unIntST :: State# s -> ST s Int -> (# State# s, Int# #)
|
|
||||||
{-# inline unIntST #-}
|
|
||||||
unIntST s0 (ST f) = case f s0 of
|
|
||||||
(# s1, I# i #) -> (# s1, i #)
|
|
||||||
|
|
||||||
-- This is slightly inaccurate. I think this can actually cause
|
|
||||||
-- problems in some situations. The log10 function from C would
|
|
||||||
-- be better. The inaccuracy here cause the logarithm to be slightly
|
|
||||||
-- larger than it should be. There might actually be a simple way to
|
|
||||||
-- fix this by just using recursion to compute it. We just floor the
|
|
||||||
-- result anyway. Hmm...
|
|
||||||
logBase10 :: Double -> Double
|
|
||||||
logBase10 d = log d / 2.30258509299
|
|
||||||
|
|
||||||
-- Based on C code from https://stackoverflow.com/a/5558614
|
-- Based on C code from https://stackoverflow.com/a/5558614
|
||||||
-- For numbers less than 1073741829, this gives a correct answer.
|
-- For numbers less than 1073741829, this gives a correct answer.
|
||||||
|
@ -1134,3 +1048,7 @@ approxDiv10 !n = unsafeShiftR (0x1999999A * n) 32
|
||||||
|
|
||||||
unsafeWordToWord8 :: Word -> Word8
|
unsafeWordToWord8 :: Word -> Word8
|
||||||
unsafeWordToWord8 (W# w) = W8# w
|
unsafeWordToWord8 (W# w) = W8# w
|
||||||
|
|
||||||
|
foreign import ccall unsafe "bytebuild_paste_double" c_paste_double ::
|
||||||
|
MutableByteArray# s -> Int# -> Double# -> IO Int
|
||||||
|
|
||||||
|
|
12
test/Main.hs
12
test/Main.hs
|
@ -1,4 +1,5 @@
|
||||||
{-# language BangPatterns #-}
|
{-# language BangPatterns #-}
|
||||||
|
{-# language NumericUnderscores #-}
|
||||||
{-# language ScopedTypeVariables #-}
|
{-# language ScopedTypeVariables #-}
|
||||||
{-# language TypeApplications #-}
|
{-# language TypeApplications #-}
|
||||||
{-# language OverloadedStrings #-}
|
{-# language OverloadedStrings #-}
|
||||||
|
@ -141,6 +142,8 @@ tests = testGroup "Tests"
|
||||||
pack ("999999999") @=? runConcat 1 (doubleDec 999999999)
|
pack ("999999999") @=? runConcat 1 (doubleDec 999999999)
|
||||||
, THU.testCase "doubleDec-K" $
|
, THU.testCase "doubleDec-K" $
|
||||||
pack ("-99999999") @=? runConcat 1 (doubleDec (-99999999))
|
pack ("-99999999") @=? runConcat 1 (doubleDec (-99999999))
|
||||||
|
, THU.testCase "doubleDec-L" $
|
||||||
|
AsciiByteArray (pack ("6.66666666666666e-12")) @=? AsciiByteArray (runConcat 1 (doubleDec (2 / 300_000_000_000)))
|
||||||
, THU.testCase "shortTextJsonString-A" $
|
, THU.testCase "shortTextJsonString-A" $
|
||||||
pack ("\"hello\"") @=? runConcat 1 (shortTextJsonString "hello")
|
pack ("\"hello\"") @=? runConcat 1 (shortTextJsonString "hello")
|
||||||
, THU.testCase "shortTextJsonString-B" $
|
, THU.testCase "shortTextJsonString-B" $
|
||||||
|
@ -316,6 +319,15 @@ runConcat n = Chunks.concatU . run n
|
||||||
c2w :: Char -> Word8
|
c2w :: Char -> Word8
|
||||||
c2w = fromIntegral . ord
|
c2w = fromIntegral . ord
|
||||||
|
|
||||||
|
-- Just a wrapper with a show instance that displays as ascii when possible.
|
||||||
|
newtype AsciiByteArray = AsciiByteArray ByteArray
|
||||||
|
deriving (Eq)
|
||||||
|
|
||||||
|
instance Show AsciiByteArray where
|
||||||
|
show (AsciiByteArray b) = if Bytes.all (\w -> w >= 32 && w < 127) (Bytes.fromByteArray b)
|
||||||
|
then Bytes.toLatinString (Bytes.fromByteArray b)
|
||||||
|
else show (show b)
|
||||||
|
|
||||||
instance Arbitrary Word128 where
|
instance Arbitrary Word128 where
|
||||||
arbitrary = liftA2 Word128 TQC.arbitrary TQC.arbitrary
|
arbitrary = liftA2 Word128 TQC.arbitrary TQC.arbitrary
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue