dash/contrib/message-capture/message-capture-parser.py
MacroFake 6731b10288
Merge bitcoin/bitcoin#26007: [contrib] message-capture-parser: fix AssertionError on parsing headers message
644772b9efffda4dac01aff54042b3162079514d message-capture-parser: fix AssertionError on parsing `headers` message (Sebastian Falbesoner)

Pull request description:

  If a test framework message's field name is in the list of `HASH_INT_VECTORS`, we currently assume that it _always_ has to contain a vector of integers and throw otherwise:
  0ebd4db32b/contrib/message-capture/message-capture-parser.py (L82-L83)
  (introduced in PR #25367, commit 42bbbba7c83d1e2baad18b4c6f05bad1358eb117).

  However, that assumption is too strict. The (de)serialization field name "headers" is used in two different message types, one for `cfcheckpt` (where it is serialized as an integer vector), and another time for `headers` (where it is serialized as a vector of `CBlockHeader`s). Parsing the latter fails as it is not an integer vector and thus triggers the assert.

  Fix this by adding the integer type check as additional condition to the `HASH_INT_VECTORS` check rather than asserting.
  Fixes #25954.

ACKs for top commit:
  glozow:
    ACK 644772b9efffda4dac01aff54042b3162079514d

Tree-SHA512: c98a107f6703c6c1a81771907c25bcc171c631b57fd605fbebaedd93d651e2ef02fb5601853a9bc7d659ab531c5f47770181173a36ea2b37f584aa7a37b66505
2024-02-29 12:33:46 -06:00

215 lines
7.3 KiB
Python
Executable File

#!/usr/bin/env python3
# Copyright (c) 2020 The Bitcoin Core developers
# Distributed under the MIT software license, see the accompanying
# file COPYING or http://www.opensource.org/licenses/mit-license.php.
"""Parse message capture binary files. To be used in conjunction with -capturemessages."""
import argparse
import os
import shutil
import sys
from io import BytesIO
import json
from pathlib import Path
from typing import Any, List, Optional
sys.path.append(os.path.join(os.path.dirname(__file__), '../../test/functional'))
from test_framework.messages import ser_uint256 # noqa: E402
from test_framework.p2p import MESSAGEMAP # noqa: E402
TIME_SIZE = 8
LENGTH_SIZE = 4
MSGTYPE_SIZE = 12
# The test framework classes stores hashes as large ints in many cases.
# These are variables of type uint256 in core.
# There isn't a way to distinguish between a large int and a large int that is actually a blob of bytes.
# As such, they are itemized here.
# Any variables with these names that are of type int are actually uint256 variables.
# (These can be easily found by looking for calls to deser_uint256, deser_uint256_vector, and uint256_from_str in messages.py)
HASH_INTS = [
"blockhash",
"block_hash",
"hash",
"hashMerkleRoot",
"hashPrevBlock",
"hashstop",
"prev_header",
"sha256",
"stop_hash",
]
HASH_INT_VECTORS = [
"hashes",
"headers",
"vHave",
"vHash",
]
class ProgressBar:
def __init__(self, total: float):
self.total = total
self.running = 0
def set_progress(self, progress: float):
cols = shutil.get_terminal_size()[0]
if cols <= 12:
return
max_blocks = cols - 9
num_blocks = int(max_blocks * progress)
print('\r[ {}{} ] {:3.0f}%'
.format('#' * num_blocks,
' ' * (max_blocks - num_blocks),
progress * 100),
end ='')
def update(self, more: float):
self.running += more
self.set_progress(self.running / self.total)
def to_jsonable(obj: Any) -> Any:
if hasattr(obj, "__dict__"):
return obj.__dict__
elif hasattr(obj, "__slots__"):
ret = {} # type: Any
for slot in obj.__slots__:
val = getattr(obj, slot, None)
if slot in HASH_INTS and isinstance(val, int):
ret[slot] = ser_uint256(val).hex()
elif slot in HASH_INT_VECTORS and all(isinstance(a, int) for a in val):
ret[slot] = [ser_uint256(a).hex() for a in val]
else:
ret[slot] = to_jsonable(val)
return ret
elif isinstance(obj, list):
return [to_jsonable(a) for a in obj]
elif isinstance(obj, bytes):
return obj.hex()
else:
return obj
def process_file(path: str, messages: List[Any], recv: bool, progress_bar: Optional[ProgressBar]) -> None:
with open(path, 'rb') as f_in:
if progress_bar:
bytes_read = 0
while True:
if progress_bar:
# Update progress bar
diff = f_in.tell() - bytes_read - 1
progress_bar.update(diff)
bytes_read = f_in.tell() - 1
# Read the Header
tmp_header_raw = f_in.read(TIME_SIZE + LENGTH_SIZE + MSGTYPE_SIZE)
if not tmp_header_raw:
break
tmp_header = BytesIO(tmp_header_raw)
time = int.from_bytes(tmp_header.read(TIME_SIZE), "little") # type: int
msgtype = tmp_header.read(MSGTYPE_SIZE).split(b'\x00', 1)[0] # type: bytes
length = int.from_bytes(tmp_header.read(LENGTH_SIZE), "little") # type: int
# Start converting the message to a dictionary
msg_dict = {}
msg_dict["direction"] = "recv" if recv else "sent"
msg_dict["time"] = time
msg_dict["size"] = length # "size" is less readable here, but more readable in the output
msg_ser = BytesIO(f_in.read(length))
# Determine message type
if msgtype not in MESSAGEMAP:
# Unrecognized message type
try:
msgtype_tmp = msgtype.decode()
if not msgtype_tmp.isprintable():
raise UnicodeDecodeError
msg_dict["msgtype"] = msgtype_tmp
except UnicodeDecodeError:
msg_dict["msgtype"] = "UNREADABLE"
msg_dict["body"] = msg_ser.read().hex()
msg_dict["error"] = "Unrecognized message type."
messages.append(msg_dict)
print(f"WARNING - Unrecognized message type {msgtype} in {path}", file=sys.stderr)
continue
# Deserialize the message
msg = MESSAGEMAP[msgtype]()
msg_dict["msgtype"] = msgtype.decode()
try:
msg.deserialize(msg_ser)
except KeyboardInterrupt:
raise
except Exception:
# Unable to deserialize message body
msg_ser.seek(0, os.SEEK_SET)
msg_dict["body"] = msg_ser.read().hex()
msg_dict["error"] = "Unable to deserialize message."
messages.append(msg_dict)
print(f"WARNING - Unable to deserialize message in {path}", file=sys.stderr)
continue
# Convert body of message into a jsonable object
if length:
msg_dict["body"] = to_jsonable(msg)
messages.append(msg_dict)
if progress_bar:
# Update the progress bar to the end of the current file
# in case we exited the loop early
f_in.seek(0, os.SEEK_END) # Go to end of file
diff = f_in.tell() - bytes_read - 1
progress_bar.update(diff)
def main():
parser = argparse.ArgumentParser(
description=__doc__,
epilog="EXAMPLE \n\t{0} -o out.json <data-dir>/message_capture/**/*.dat".format(sys.argv[0]),
formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument(
"capturepaths",
nargs='+',
help="binary message capture files to parse.")
parser.add_argument(
"-o", "--output",
help="output file. If unset print to stdout")
parser.add_argument(
"-n", "--no-progress-bar",
action='store_true',
help="disable the progress bar. Automatically set if the output is not a terminal")
args = parser.parse_args()
capturepaths = [Path.cwd() / Path(capturepath) for capturepath in args.capturepaths]
output = Path.cwd() / Path(args.output) if args.output else False
use_progress_bar = (not args.no_progress_bar) and sys.stdout.isatty()
messages = [] # type: List[Any]
if use_progress_bar:
total_size = sum(capture.stat().st_size for capture in capturepaths)
progress_bar = ProgressBar(total_size)
else:
progress_bar = None
for capture in capturepaths:
process_file(str(capture), messages, "recv" in capture.stem, progress_bar)
messages.sort(key=lambda msg: msg['time'])
if use_progress_bar:
progress_bar.set_progress(1)
jsonrep = json.dumps(messages)
if output:
with open(str(output), 'w+', encoding="utf8") as f_out:
f_out.write(jsonrep)
else:
print(jsonrep)
if __name__ == "__main__":
main()