dash/contrib/message-capture/message-capture-parser.py
pasta 2bde1ddca4
Merge #5927: fix: check if message can be handled before attempting to deserialize
afbae06520 fix: check if message can be handled before attempting to deserialize (thephez)

Pull request description:

  ## Issue being fixed or feature implemented
  Currently `message-capture-parser.py` crashes when encountering certain messages (e.g. mnauth). This at least makes it possible to run the script without crashing. There may be better options for solving this.

  ## What was done?
  Check if the dictionary is going to return `None` before we attempt to do something further with it. Hide whitespace changes to see the few lines that were added: https://github.com/dashpay/dash/pull/5927/files?diff=unified&w=1

  ## How Has This Been Tested?
  Running script locally

  ## Breaking Changes
  N/A

  ## Checklist:
    _Go over all the following points, and put an `x` in all the boxes that apply._
  - [x] I have performed a self-review of my own code
  - [ ] I have commented my code, particularly in hard-to-understand areas
  - [ ] I have added or updated relevant unit/integration/functional/e2e tests
  - [ ] I have made corresponding changes to the documentation
  - [ ] I have assigned this pull request to a milestone _(for repository code-owners and collaborators only)_

Top commit has no ACKs.

Tree-SHA512: 041af57afcfd1d93487fd41d34a50e3a99f7fa129563dfe1e1cf2498974c8e658bd6acb9c810887c841160074056ba999e9b6607ac9336b98b9d42806682c607
2024-04-03 11:21:15 -05:00

216 lines
7.5 KiB
Python
Executable File

#!/usr/bin/env python3
# Copyright (c) 2020 The Bitcoin Core developers
# Distributed under the MIT software license, see the accompanying
# file COPYING or http://www.opensource.org/licenses/mit-license.php.
"""Parse message capture binary files. To be used in conjunction with -capturemessages."""
import argparse
import os
import shutil
import sys
from io import BytesIO
import json
from pathlib import Path
from typing import Any, List, Optional
sys.path.append(os.path.join(os.path.dirname(__file__), '../../test/functional'))
from test_framework.messages import ser_uint256 # noqa: E402
from test_framework.p2p import MESSAGEMAP # noqa: E402
TIME_SIZE = 8
LENGTH_SIZE = 4
MSGTYPE_SIZE = 12
# The test framework classes stores hashes as large ints in many cases.
# These are variables of type uint256 in core.
# There isn't a way to distinguish between a large int and a large int that is actually a blob of bytes.
# As such, they are itemized here.
# Any variables with these names that are of type int are actually uint256 variables.
# (These can be easily found by looking for calls to deser_uint256, deser_uint256_vector, and uint256_from_str in messages.py)
HASH_INTS = [
"blockhash",
"block_hash",
"hash",
"hashMerkleRoot",
"hashPrevBlock",
"hashstop",
"prev_header",
"sha256",
"stop_hash",
]
HASH_INT_VECTORS = [
"hashes",
"headers",
"vHave",
"vHash",
]
class ProgressBar:
def __init__(self, total: float):
self.total = total
self.running = 0
def set_progress(self, progress: float):
cols = shutil.get_terminal_size()[0]
if cols <= 12:
return
max_blocks = cols - 9
num_blocks = int(max_blocks * progress)
print('\r[ {}{} ] {:3.0f}%'
.format('#' * num_blocks,
' ' * (max_blocks - num_blocks),
progress * 100),
end ='')
def update(self, more: float):
self.running += more
self.set_progress(self.running / self.total)
def to_jsonable(obj: Any) -> Any:
if hasattr(obj, "__dict__"):
return obj.__dict__
elif hasattr(obj, "__slots__"):
ret = {} # type: Any
for slot in obj.__slots__:
val = getattr(obj, slot, None)
if slot in HASH_INTS and isinstance(val, int):
ret[slot] = ser_uint256(val).hex()
elif slot in HASH_INT_VECTORS and all(isinstance(a, int) for a in val):
ret[slot] = [ser_uint256(a).hex() for a in val]
else:
ret[slot] = to_jsonable(val)
return ret
elif isinstance(obj, list):
return [to_jsonable(a) for a in obj]
elif isinstance(obj, bytes):
return obj.hex()
else:
return obj
def process_file(path: str, messages: List[Any], recv: bool, progress_bar: Optional[ProgressBar]) -> None:
with open(path, 'rb') as f_in:
if progress_bar:
bytes_read = 0
while True:
if progress_bar:
# Update progress bar
diff = f_in.tell() - bytes_read - 1
progress_bar.update(diff)
bytes_read = f_in.tell() - 1
# Read the Header
tmp_header_raw = f_in.read(TIME_SIZE + LENGTH_SIZE + MSGTYPE_SIZE)
if not tmp_header_raw:
break
tmp_header = BytesIO(tmp_header_raw)
time = int.from_bytes(tmp_header.read(TIME_SIZE), "little") # type: int
msgtype = tmp_header.read(MSGTYPE_SIZE).split(b'\x00', 1)[0] # type: bytes
length = int.from_bytes(tmp_header.read(LENGTH_SIZE), "little") # type: int
# Start converting the message to a dictionary
msg_dict = {}
msg_dict["direction"] = "recv" if recv else "sent"
msg_dict["time"] = time
msg_dict["size"] = length # "size" is less readable here, but more readable in the output
msg_ser = BytesIO(f_in.read(length))
# Determine message type
if msgtype not in MESSAGEMAP or MESSAGEMAP[msgtype] is None:
# Unrecognized or unhandled message type
try:
msgtype_tmp = msgtype.decode()
if not msgtype_tmp.isprintable():
raise UnicodeDecodeError
msg_dict["msgtype"] = msgtype_tmp
except UnicodeDecodeError:
msg_dict["msgtype"] = "UNREADABLE"
err_str = "Unrecognized" if msgtype not in MESSAGEMAP else "Unhandled"
msg_dict["body"] = msg_ser.read().hex()
msg_dict["error"] = f"{err_str} message type"
messages.append(msg_dict)
print(f"WARNING - {msg_dict['error']} {msgtype} in {path}", file=sys.stderr)
continue
# Deserialize the message
msg = MESSAGEMAP[msgtype]()
msg_dict["msgtype"] = msgtype.decode()
try:
msg.deserialize(msg_ser)
except KeyboardInterrupt:
raise
except Exception:
# Unable to deserialize message body
msg_ser.seek(0, os.SEEK_SET)
msg_dict["body"] = msg_ser.read().hex()
msg_dict["error"] = "Unable to deserialize message."
messages.append(msg_dict)
print(f"WARNING - Unable to deserialize message in {path}", file=sys.stderr)
continue
# Convert body of message into a jsonable object
if length:
msg_dict["body"] = to_jsonable(msg)
messages.append(msg_dict)
if progress_bar:
# Update the progress bar to the end of the current file
# in case we exited the loop early
f_in.seek(0, os.SEEK_END) # Go to end of file
diff = f_in.tell() - bytes_read - 1
progress_bar.update(diff)
def main():
parser = argparse.ArgumentParser(
description=__doc__,
epilog="EXAMPLE \n\t{0} -o out.json <data-dir>/message_capture/**/*.dat".format(sys.argv[0]),
formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument(
"capturepaths",
nargs='+',
help="binary message capture files to parse.")
parser.add_argument(
"-o", "--output",
help="output file. If unset print to stdout")
parser.add_argument(
"-n", "--no-progress-bar",
action='store_true',
help="disable the progress bar. Automatically set if the output is not a terminal")
args = parser.parse_args()
capturepaths = [Path.cwd() / Path(capturepath) for capturepath in args.capturepaths]
output = Path.cwd() / Path(args.output) if args.output else False
use_progress_bar = (not args.no_progress_bar) and sys.stdout.isatty()
messages = [] # type: List[Any]
if use_progress_bar:
total_size = sum(capture.stat().st_size for capture in capturepaths)
progress_bar = ProgressBar(total_size)
else:
progress_bar = None
for capture in capturepaths:
process_file(str(capture), messages, "recv" in capture.stem, progress_bar)
messages.sort(key=lambda msg: msg['time'])
if use_progress_bar:
progress_bar.set_progress(1)
jsonrep = json.dumps(messages)
if output:
with open(str(output), 'w+', encoding="utf8") as f_out:
f_out.write(jsonrep)
else:
print(jsonrep)
if __name__ == "__main__":
main()