From 2b85e2cb849b9ac7a4472f3c2dcb145e70624b42 Mon Sep 17 00:00:00 2001 From: Andrew Martin Date: Mon, 30 Dec 2019 20:14:15 -0500 Subject: [PATCH] Add function for zero-padded decimal encoding of two digit numers --- CHANGELOG.md | 1 + src/Data/ByteArray/Builder/Bounded.hs | 28 ++++++++++++++++++++++++--- test/Main.hs | 9 +++++++++ 3 files changed, 35 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 233c471..8e2280e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ * Add `putManyConsLength`, useful for chunked HTTP encoding. * Add `runOnto` * Add `Data.Bytes.Chunks.length` +* Add `wordPaddedTwoDigitDec` ## 0.3.1.0 -- 2019-11-20 diff --git a/src/Data/ByteArray/Builder/Bounded.hs b/src/Data/ByteArray/Builder/Bounded.hs index fb1e843..508bc47 100644 --- a/src/Data/ByteArray/Builder/Bounded.hs +++ b/src/Data/ByteArray/Builder/Bounded.hs @@ -51,6 +51,8 @@ module Data.ByteArray.Builder.Bounded , word8LowerHex , ascii , char + -- ** Native + , wordPaddedTwoDigitDec -- ** Machine-Readable -- *** One , word8 @@ -522,6 +524,22 @@ word8LowerHex# w# where w = W# w# +-- | Encode a number less than 100 as a decimal number, zero-padding it to +-- two digits. For example: 0 is encoded as @00@, 5 is encoded as @05@, and +-- 73 is encoded as @73@. +-- +-- Precondition: Argument less than 100. Failure to satisfy this precondition +-- will not result in a segfault, but the resulting bytes are undefined. The +-- implement uses a heuristic for division that is inaccurate for large +-- numbers. +wordPaddedTwoDigitDec :: Word -> Builder 2 +wordPaddedTwoDigitDec !w = Unsafe.construct $ \arr off -> do + let d1 = approxDiv10 w + d2 = w - (10 * d1) + writeByteArray arr off (unsafeWordToWord8 (d1 + 48)) + writeByteArray arr (off + 1) (unsafeWordToWord8 (d2 + 48)) + pure (off + 2) + -- | Encode an ASCII character. -- Precondition: Input must be an ASCII character. This is not checked. ascii :: Char -> Builder 1 @@ -561,9 +579,6 @@ char c codepoint :: Word codepoint = fromIntegral (ord c) - unsafeWordToWord8 :: Word -> Word8 - unsafeWordToWord8 (W# w) = W8# w - -- precondition: codepoint is less than 0x800 byteTwoOne :: Word -> Word byteTwoOne w = unsafeShiftR w 6 .|. 0b11000000 @@ -807,3 +822,10 @@ unIntST s0 (ST f) = case f s0 of -- result anyway. Hmm... logBase10 :: Double -> Double logBase10 d = log d / 2.30258509299 + +-- Based on C code from https://stackoverflow.com/a/5558614 +approxDiv10 :: Word -> Word +approxDiv10 !n = unsafeShiftR (0x1999999A * n) 32 + +unsafeWordToWord8 :: Word -> Word8 +unsafeWordToWord8 (W# w) = W8# w diff --git a/test/Main.hs b/test/Main.hs index ddd738e..b8f2b64 100644 --- a/test/Main.hs +++ b/test/Main.hs @@ -64,6 +64,10 @@ tests = testGroup "Tests" runConcat 1 (word64PaddedUpperHex w) === pack (showWord64PaddedUpperHex w) + , TQC.testProperty "wordPaddedTwoDigitDec" $ TQC.forAll (TQC.choose (0,99)) $ \w -> + Bounded.run Nat.two (Bounded.wordPaddedTwoDigitDec w) + === + pack (zeroPadL 2 (show w)) , TQC.testProperty "word8Dec" $ \w -> runConcat 1 (word8Dec w) === @@ -278,3 +282,8 @@ c2w = fromIntegral . ord instance Arbitrary Word128 where arbitrary = liftA2 Word128 TQC.arbitrary TQC.arbitrary + +zeroPadL :: Int -> String -> String +zeroPadL n s + | length s < n = replicate (n - length s) '0' ++ s + | otherwise = s