first
This commit is contained in:
commit
0b41af265e
26 changed files with 1103 additions and 0 deletions
548
ips.lhs
Normal file
548
ips.lhs
Normal file
|
@ -0,0 +1,548 @@
|
|||
---
|
||||
title: applying patches
|
||||
...
|
||||
|
||||
stuff like rom hacks are usually distributed in one of two formats:
|
||||
[IPS][] or [BPS][].
|
||||
|
||||
[IPS]: http://fileformats.archiveteam.org/wiki/IPS_(binary_patch_format)
|
||||
[BPS]: https://github.com/blakesmith/rombp/blob/master/docs/bps_spec.md
|
||||
|
||||
> import Control.Applicative
|
||||
> import Control.Monad
|
||||
> import Control.Monad.Except
|
||||
> import Control.Monad.State
|
||||
> import Data.Bits
|
||||
> import Data.ByteString (ByteString)
|
||||
> import Data.ByteString qualified as BS
|
||||
> import Data.Word
|
||||
> import Numeric.Natural
|
||||
|
||||
and for tests:
|
||||
|
||||
<!--
|
||||
#ifdef TESTS
|
||||
-->
|
||||
|
||||
> import Numeric
|
||||
> import Data.Either
|
||||
> import Test.QuickCheck
|
||||
> import GHC.Generics
|
||||
|
||||
<!--
|
||||
#endif
|
||||
-->
|
||||
|
||||
|
||||
== basic parsers
|
||||
|
||||
i could just use attoparsec or something, and in the original code i did, but
|
||||
that's not really needed for a program like this, so let's make our own. it's
|
||||
good enough for these purposes. the error messages are bad, but what are you
|
||||
going to do, hand-edit an IPS file???
|
||||
|
||||
a parser has read-write access to the input, a `ByteString`; and the ability to
|
||||
throw errors (string messages are fine for this).
|
||||
|
||||
> newtype Parser a = Parser (StateT ByteString (Except String) a)
|
||||
> deriving (Functor, Applicative, Alternative, Monad, MonadPlus)
|
||||
|
||||
when running a parser we want to check that if it was successful, then it did
|
||||
actually consume all of the input. but not doing so, and returning the remainder
|
||||
instead, will be useful for tests.
|
||||
|
||||
> runParserPartial :: Parser a -> ByteString -> Either String (a, ByteString)
|
||||
> runParserPartial (Parser p) str = runExcept (runStateT p str)
|
||||
>
|
||||
> runParser :: Parser a -> ByteString -> Either String a
|
||||
> runParser p str = do
|
||||
> (result, rest) <- runParserPartial p str
|
||||
> if BS.null rest then
|
||||
> Right result
|
||||
> else
|
||||
> Left ("junk after EOF: " ++ show rest)
|
||||
|
||||
<details>
|
||||
<summary>testing parser functions</summary>
|
||||
<!--
|
||||
#ifdef TESTS
|
||||
-->
|
||||
|
||||
to test these parsers we can generate random inputs and check that they do the
|
||||
right thing. a test input is a byte string, but printed with each byte in hex.
|
||||
|
||||
> newtype ParserInput = ParserInput { fromParserInput :: ByteString }
|
||||
>
|
||||
> instance Show ParserInput where
|
||||
> show = unwords . map showHex2 . piToList
|
||||
>
|
||||
> showHex2 :: Word8 -> String
|
||||
> showHex2 n = case showHex n "" of [c] -> ['0', c]; str -> str
|
||||
>
|
||||
> piToList :: ParserInput -> [Word8]
|
||||
> piToList = BS.unpack . fromParserInput
|
||||
>
|
||||
> listToPI :: [Word8] -> ParserInput
|
||||
> listToPI = ParserInput . BS.pack
|
||||
|
||||
like so:
|
||||
|
||||
```
|
||||
ghci> ParserInput "\0\0\0"
|
||||
00 00 00
|
||||
ghci> ParserInput "Hello"
|
||||
48 65 6c 6c 6f
|
||||
```
|
||||
|
||||
in many cases, we don't really care about the structure of the input, so it can
|
||||
just be a sequence of any bytes. quickcheck uses the `Arbitrary` class for
|
||||
generating test input. the optional `shrink` function produces smaller but
|
||||
related values, for trying to find a minimal test case. the instances for
|
||||
`CoArbitrary` and `Function` is for generating functions which take
|
||||
`ParserInput` as an argument.
|
||||
|
||||
> instance Arbitrary ParserInput where
|
||||
> arbitrary :: Gen ParserInput
|
||||
> arbitrary = listToPI <$> arbitrary
|
||||
>
|
||||
> shrink :: ParserInput -> [ParserInput]
|
||||
> shrink = map listToPI . shrinkList (const []) . piToList
|
||||
>
|
||||
> instance CoArbitrary ParserInput where
|
||||
> coarbitrary = coarbitrary . piToList
|
||||
>
|
||||
> instance Function ParserInput where
|
||||
> function = functionMap piToList listToPI
|
||||
|
||||
in this case, `shrink` tries to remove some bytes from the string, without
|
||||
touching individual bytes.
|
||||
|
||||
<!--
|
||||
#endif
|
||||
-->
|
||||
</details>
|
||||
|
||||
when consuming a given number of characters, we check here that there is
|
||||
actually enough input left.
|
||||
|
||||
> bytes :: Int -> Parser ByteString
|
||||
> bytes len = Parser do
|
||||
> (this, rest) <- BS.splitAt len <$> get
|
||||
> if BS.length this == len then do
|
||||
> put rest
|
||||
> pure this
|
||||
> else
|
||||
> throwError "tried to read past end of input"
|
||||
|
||||
<details>
|
||||
<summary>testing `bytes`</summary>
|
||||
<!--
|
||||
#ifdef TESTS
|
||||
-->
|
||||
|
||||
for these tests, we need a generator for a string together with a valid
|
||||
prefix length. if we tell quickcheck to just guess random numbers with no help,
|
||||
it'll give up before running enough tests.
|
||||
|
||||
> data InputWithPrefix = ParserInput `WithPrefix` Int
|
||||
> deriving (Show, Generic)
|
||||
>
|
||||
> instance Arbitrary InputWithPrefix where
|
||||
> arbitrary = do
|
||||
> input <- arbitrary
|
||||
> len <- chooseInt (0, BS.length (fromParserInput input))
|
||||
> pure (input `WithPrefix` len)
|
||||
> shrink = genericShrink
|
||||
|
||||
|
||||
now, the properties we want are:
|
||||
|
||||
if the remaining input is too short, then `bytes` should fail.
|
||||
|
||||
> prop_bytes_short (ParserInput str) (NonNegative count) =
|
||||
> BS.length str < count ==> isLeft (runParserPartial (bytes count) str)
|
||||
|
||||
if there _is_ enough input, it should succeed.
|
||||
|
||||
> prop_bytes_enough (ParserInput str `WithPrefix` count) =
|
||||
> isRight (runParserPartial (bytes count) str)
|
||||
|
||||
the returned string (the first element of the returned pair) should have the
|
||||
right length.
|
||||
|
||||
> prop_bytes_length (ParserInput str `WithPrefix` count) =
|
||||
> withSuccess (bytes count) str \(result, _) -> BS.length result === count
|
||||
|
||||
appending the returned string and the remainder should give the original input.
|
||||
|
||||
> prop_bytes_append (ParserInput str `WithPrefix` count) =
|
||||
> withSuccess (bytes count) str \(result, rest) -> result <> rest === str
|
||||
|
||||
<!--
|
||||
#endif
|
||||
-->
|
||||
</details>
|
||||
|
||||
it is also useful to take input while a given condition is true. in this case,
|
||||
the parsing always succeeds but might be empty.
|
||||
|
||||
> bytesWhile :: (Word8 -> Bool) -> Parser ByteString
|
||||
> bytesWhile cond = Parser do
|
||||
> (this, rest) <- BS.span cond <$> get
|
||||
> put rest; pure this
|
||||
|
||||
<details>
|
||||
<summary>testing `bytesWhile`</summary>
|
||||
<!--
|
||||
#ifdef TESTS
|
||||
-->
|
||||
|
||||
> withSuccess :: Testable p =>
|
||||
> Parser a -> ByteString -> ((a, ByteString) -> p) -> Property
|
||||
> withSuccess p str f =
|
||||
> case runParserPartial p str of
|
||||
> Left _ -> property False
|
||||
> Right x -> property (f x)
|
||||
|
||||
`bytesWhile` always succeeds (with a possibly-empty result).
|
||||
|
||||
> prop_bytesWhile_succeed (Fn p) (ParserInput str) =
|
||||
> isRight (runParserPartial (bytesWhile p) str)
|
||||
|
||||
`bytesWhile p` returns a value where `p` holds for each byte. this test also
|
||||
categorises cases by result length, and requires the remainder to be non-empty,
|
||||
to check that we're not accidentally creating only constant functions.
|
||||
|
||||
> prop_bytesWhile_result (Fn p) (ParserInput str) = do
|
||||
> withSuccess (bytesWhile p) str \(result, rest) ->
|
||||
> collect (BS.length result)
|
||||
> (not (BS.null rest) ==> BS.all p result)
|
||||
|
||||
if `bytesWhile` doesn't consume the whole input, then `p` _doesn't_ hold for the
|
||||
first byte of the remainder.
|
||||
|
||||
> prop_bytesWhile_rest (Fn p) (ParserInput str) =
|
||||
> withSuccess (bytesWhile p) str \(_, rest) ->
|
||||
> not (BS.null rest) ==> not (p (BS.head rest))
|
||||
|
||||
<!--
|
||||
#endif
|
||||
-->
|
||||
</details>
|
||||
|
||||
|
||||
checking for an exact string can be written in terms of `bytes`.
|
||||
|
||||
> exact :: ByteString -> Parser ()
|
||||
> exact str = do
|
||||
> prefix <- bytes (BS.length str)
|
||||
> unless (prefix == str) do
|
||||
> Parser (throwError ("expected " ++ show str ++ ", got " ++ show prefix))
|
||||
|
||||
to detect the end of input, it's simplest to just look at the remaining input
|
||||
directly.
|
||||
|
||||
> eof :: Parser ()
|
||||
> eof = Parser do
|
||||
> input <- get
|
||||
> unless (BS.null input) do
|
||||
> throwError ("expected end of input, got " ++ show input)
|
||||
|
||||
reading an integer just takes the appropriate number of bytes and shifts them
|
||||
together. **multi-byte integers in the IPS format are big-endian.**
|
||||
|
||||
> word8 :: Parser Word8
|
||||
> word8 = BS.head <$> bytes 1
|
||||
>
|
||||
> word16BE :: Parser Word16
|
||||
> word16BE = do
|
||||
> hi <- word8; lo <- word8
|
||||
> pure $ fromIntegral hi `shiftL` 8 .|. fromIntegral lo
|
||||
>
|
||||
> word24BE :: Parser Word32
|
||||
> word24BE = do
|
||||
> hi <- word16BE; lo <- word8
|
||||
> pure $ fromIntegral hi `shiftL` 8 .|. fromIntegral lo
|
||||
|
||||
|
||||
== IPS file format
|
||||
|
||||
the IPS format is extremely simple. it consists of the ASCII string `PATCH`, a
|
||||
number of [chunks](#chunks), and finally the string `EOF`.
|
||||
|
||||
```{=html}
|
||||
<figure class=shadowed>
|
||||
<svg viewBox="-1 -1 602 32" width=602 height=32>
|
||||
<style>
|
||||
rect { stroke-width: 1px; stroke: currentcolor; }
|
||||
text { text-anchor: middle; }
|
||||
</style>
|
||||
<g>
|
||||
<rect width=15 height=30 fill="oklch(95% 15% 220deg)" />
|
||||
<text x=7 y=22> P </text>
|
||||
</g>
|
||||
<g transform="translate(15 0)">
|
||||
<rect width=15 height=30 fill="oklch(95% 15% 220deg)" />
|
||||
<text x=7 y=22> A </text>
|
||||
</g>
|
||||
<g transform="translate(30 0)">
|
||||
<rect width=15 height=30 fill="oklch(95% 15% 220deg)" />
|
||||
<text x=7 y=22> T </text>
|
||||
</g>
|
||||
<g transform="translate(45 0)">
|
||||
<rect width=15 height=30 fill="oklch(95% 15% 220deg)" />
|
||||
<text x=7 y=22> C </text>
|
||||
</g>
|
||||
<g transform="translate(60 0)">
|
||||
<rect width=15 height=30 fill="oklch(95% 15% 220deg)" />
|
||||
<text x=7 y=22> H </text>
|
||||
</g>
|
||||
<g transform="translate(75 0)">
|
||||
<rect width=80 height=30 fill="oklch(95% 15% 180deg)" />
|
||||
<text x=40 y=22> Chunk1 </text>
|
||||
</g>
|
||||
<g transform="translate(155 0)">
|
||||
<rect width=180 height=30 fill="oklch(95% 15% 160deg)" />
|
||||
<text x=90 y=22> Chunk2 </text>
|
||||
</g>
|
||||
<g transform="translate(335 0)">
|
||||
<rect width=120 height=30 fill="oklch(95% 15% 140deg)" />
|
||||
<text x=60 y=22> Chunk3 </text>
|
||||
</g>
|
||||
<g transform="translate(455 0)">
|
||||
<rect width=100 height=30 fill="oklch(95% 15% 120deg)" />
|
||||
<text x=50 y=22> … </text>
|
||||
</g>
|
||||
<g transform="translate(555 0)">
|
||||
<rect width=15 height=30 fill="oklch(95% 15% 100deg)" />
|
||||
<text x=7 y=22> E </text>
|
||||
</g>
|
||||
<g transform="translate(570 0)">
|
||||
<rect width=15 height=30 fill="oklch(95% 15% 100deg)" />
|
||||
<text x=7 y=22> O </text>
|
||||
</g>
|
||||
<g transform="translate(585 0)">
|
||||
<rect width=15 height=30 fill="oklch(95% 15% 100deg)" />
|
||||
<text x=7 y=22> F </text>
|
||||
</g>
|
||||
</svg>
|
||||
</figure>
|
||||
```
|
||||
|
||||
> type IPS = [IpsChunk]
|
||||
>
|
||||
> ipsFile :: Parser IPS
|
||||
> ipsFile = do exact "PATCH"; ipsChunks
|
||||
|
||||
if the rest of the input is just the `EOF` marker (followed by the actual end),
|
||||
then we're done. otherwise, parse a chunk and repeat.
|
||||
|
||||
explicitly checking for the end of input as well means that chunks with an
|
||||
offset of `0x454F46`, which will _look_ like `EOF`, are still handled
|
||||
correctly.
|
||||
|
||||
> ipsChunks :: Parser [IpsChunk]
|
||||
> ipsChunks = stop <|> go where
|
||||
> stop = do exact "EOF"
|
||||
> eof
|
||||
> pure []
|
||||
> go = do next <- ipsChunk
|
||||
> rest <- ipsChunks
|
||||
> pure (next : rest)
|
||||
|
||||
=== chunks
|
||||
|
||||
a chunk can either be a [run-length-encoded](#rle) or a [literal](#lit) chunk.
|
||||
in either case, they are preceded by a 24-bit offset giving their start
|
||||
position.
|
||||
|
||||
> data IpsChunk = IpsChunk { offset :: Word32, body :: IpsChunkBody }
|
||||
>
|
||||
> data IpsChunkBody
|
||||
> = Lit ByteString
|
||||
> | RLE { size :: Word16, value :: Word8 }
|
||||
>
|
||||
> ipsChunk :: Parser IpsChunk
|
||||
> ipsChunk = IpsChunk <$> word24BE <*> (rle <|> lit)
|
||||
|
||||
a [**run-length encoded chunk**]{#rle} indicates a single byte being repeated a
|
||||
given number of times. it is represented by two zero bytes, followed by a
|
||||
two-byte length, and a single byte value. the zero bytes exist to distinguish it
|
||||
from a literal chunk, which cannot have a zero length.
|
||||
|
||||
```{=html}
|
||||
<figure class=shadowed>
|
||||
<svg viewBox="-1 -1 642 32" width=642 height=32>
|
||||
<style>
|
||||
rect { stroke-width: 1px; stroke: currentcolor; }
|
||||
text { text-anchor: middle }
|
||||
</style>
|
||||
<g>
|
||||
<rect width=240 height=30 fill="oklch(95% 15% 80deg)" />
|
||||
<text x=120 y=22> off (3) </text>
|
||||
</g>
|
||||
<g transform="translate(240 0)">
|
||||
<rect width=160 height=30 fill="oklch(95% 15% 60deg)" />
|
||||
<text x=80 y=22> "00" (2) </text>
|
||||
</g>
|
||||
<g transform="translate(400 0)">
|
||||
<rect width=160 height=30 fill="oklch(95% 15% 40deg)" />
|
||||
<text x=80 y=22> size (2) </text>
|
||||
</g>
|
||||
<g transform="translate(560 0)">
|
||||
<rect width=80 height=30 fill="oklch(95% 15% 20deg)" />
|
||||
<text x=40 y=22> val (1) </text>
|
||||
</g>
|
||||
</svg>
|
||||
</figure>
|
||||
```
|
||||
|
||||
> rle :: Parser IpsChunkBody
|
||||
> rle = do
|
||||
> exact [0,0]
|
||||
> size <- word16BE
|
||||
> value <- word8
|
||||
> pure (RLE { size, value })
|
||||
|
||||
a [**literal chunk**]{#lit} consists of a 16-bit (non-zero) length, followed by
|
||||
the data it contains. actually checking the size is non-zero isn't needed,
|
||||
because `rle` is attempted first and will succeed in that case.
|
||||
|
||||
```{=html}
|
||||
<figure class=shadowed>
|
||||
<svg viewBox="-1 -1 642 32" width=642 height=32>
|
||||
<style>
|
||||
rect, path { stroke-width: 1px; stroke: currentcolor; }
|
||||
text { text-anchor: middle; }
|
||||
</style>
|
||||
<g>
|
||||
<rect width=240 height=30 fill="oklch(95% 15% 80deg)" />
|
||||
<text x=120 y=22> off (3) </text>
|
||||
</g>
|
||||
<g transform="translate(240 0)">
|
||||
<rect width=160 height=30 fill="oklch(95% 15% 60deg)" />
|
||||
<text x=80 y=22> size (2) </text>
|
||||
</g>
|
||||
<g transform="translate(400 0)">
|
||||
<path fill="oklch(95% 15% 40deg)"
|
||||
d="M 240,0 h -240 v 30 h 240
|
||||
l -5,-5 5,-5 -5,-5 5,-5 -5,-5 z" />
|
||||
<text x=120 y=22> data (<tspan font-style=italic>size</tspan>) … </text>
|
||||
</g>
|
||||
</svg>
|
||||
</figure>
|
||||
```
|
||||
|
||||
> lit :: Parser IpsChunkBody
|
||||
> lit = do
|
||||
> size <- word16BE
|
||||
> Lit <$> bytes (fromIntegral size)
|
||||
|
||||
|
||||
== BPS file format
|
||||
|
||||
BPS uses arbitrary-size numbers using a variable-length, **little-endian**
|
||||
encoding.
|
||||
|
||||
Each byte contains seven bits of data, using the most significant bit to
|
||||
indicate whether the number continues. One extra quirk is that each non-final
|
||||
byte is also decremented. the rationale for this is that otherwise, it would be
|
||||
possible to encode `1` as `0x81` but also as `0x01_80`, with the meaning
|
||||
`000 0000 000 0001` with extra zero padding. with this extra step, the second
|
||||
encoding actually represents `000 0001 000 0001`, or 129.
|
||||
|
||||
so we read the input until reaching a byte with the MSB set, plus one more, then
|
||||
shift them all together, right to left.
|
||||
|
||||
|
||||
> bpsNumber :: Parser Natural
|
||||
> bpsNumber = do
|
||||
> rest <- bytesWhile \b -> not (testBit b 7)
|
||||
> end <- word8
|
||||
> pure (BS.foldr' combine (fromIntegral (clearBit end 7)) rest)
|
||||
> where
|
||||
> combine byte acc = fromIntegral byte .|. (acc + 1) `shiftL` 7
|
||||
|
||||
|
||||
<details>
|
||||
<summary>
|
||||
testing `bpsNumber`
|
||||
</summary>
|
||||
<!--
|
||||
#ifdef TESTS
|
||||
-->
|
||||
|
||||
this function is _almost_ a nice fold, but i'm not yet 100% certain the `(+ 1)`
|
||||
is in the right place. so here's a more literal translation of the C code:
|
||||
|
||||
> bpsNumberSpec :: Parser Natural
|
||||
> bpsNumberSpec = loop 0 1 where
|
||||
> loop acc shft = do
|
||||
> byte <- word8
|
||||
> let acc' = acc + fromIntegral (clearBit byte 7) * shft
|
||||
> let shft' = shft `shiftL` 7
|
||||
> if testBit byte 7 then
|
||||
> pure acc'
|
||||
> else
|
||||
> loop (acc' + shft') shft'
|
||||
|
||||
a valid encoding of a number is a string of zero or more bytes with the MSB
|
||||
unset, followed by one with it set.
|
||||
|
||||
> newtype BpsNumber = BpsNumber ByteString
|
||||
> deriving Show via ParserInput
|
||||
>
|
||||
> instance Arbitrary BpsNumber where
|
||||
> arbitrary = do
|
||||
> rest :: [Word8] <- arbitrary
|
||||
> final :: Word8 <- arbitrary
|
||||
> let list = map (\b -> clearBit b 7) rest ++ [setBit final 7]
|
||||
> pure (BpsNumber (BS.pack list))
|
||||
|
||||
both functions should accept any string of this format, and should each give the
|
||||
same result.
|
||||
|
||||
> prop_bpsOk (BpsNumber e) =
|
||||
> let bps' = runParser bpsNumberSpec e
|
||||
> bps = runParser bpsNumber e in
|
||||
> isRight bps' .&&. isRight bps .&&. bps' === bps
|
||||
|
||||
<!--
|
||||
#endif
|
||||
-->
|
||||
</details>
|
||||
|
||||
|
||||
> props :: [(String, Property)]
|
||||
> props =
|
||||
> [("prop_bytes_short", property prop_bytes_short),
|
||||
> ("prop_bytes_enough", property prop_bytes_enough),
|
||||
> ("prop_bytes_length", property prop_bytes_length),
|
||||
> ("prop_bytes_append", property prop_bytes_append),
|
||||
> ("prop_bytesWhile_succeed", property prop_bytesWhile_succeed),
|
||||
> ("prop_bytesWhile_result", property prop_bytesWhile_result),
|
||||
> ("prop_bytesWhile_rest", property prop_bytesWhile_rest),
|
||||
> ("prop_bpsOk", property prop_bpsOk)]
|
||||
>
|
||||
> testAll :: IO ()
|
||||
> testAll = forM_ props \(name, prop) -> do
|
||||
> putStr (name ++ ": ")
|
||||
> quickCheck prop
|
||||
|
||||
|
||||
== applying a patch
|
||||
|
||||
|
||||
> _outSize :: [IpsChunk] -> ByteString -> Int
|
||||
> _outSize patch orig =
|
||||
> foldl' (\s c -> max s (end c)) (BS.length orig) patch
|
||||
> where
|
||||
> end (IpsChunk offset body) = fromIntegral offset + size body
|
||||
> size (Lit str) = BS.length str
|
||||
> size (RLE sz _) = fromIntegral sz
|
||||
|
||||
|
||||
|
||||
|
||||
> main :: IO ()
|
||||
> main = testAll
|
Loading…
Add table
Add a link
Reference in a new issue