diff --git a/doc/developer-notes.md b/doc/developer-notes.md index f9b5380c7c..d2b50b4a92 100644 --- a/doc/developer-notes.md +++ b/doc/developer-notes.md @@ -17,6 +17,7 @@ Developer Notes - [DEBUG_LOCKORDER](#debug_lockorder) - [Valgrind suppressions file](#valgrind-suppressions-file) - [Compiling for test coverage](#compiling-for-test-coverage) + - [Performance profiling with perf](#performance-profiling-with-perf) - [Locking/mutex usage notes](#lockingmutex-usage-notes) - [Threads](#threads) - [Ignoring IDE/editor files](#ignoring-ideeditor-files) @@ -271,6 +272,51 @@ make cov # A coverage report will now be accessible at `./test_dash.coverage/index.html`. ``` +### Performance profiling with perf + +Profiling is a good way to get a precise idea of where time is being spent in +code. One tool for doing profiling on Linux platforms is called +[`perf`](http://www.brendangregg.com/perf.html), and has been integrated into +the functional test framework. Perf can observe a running process and sample +(at some frequency) where its execution is. + +Perf installation is contingent on which kernel version you're running; see +[this StackExchange +thread](https://askubuntu.com/questions/50145/how-to-install-perf-monitoring-tool) +for specific instructions. + +Certain kernel parameters may need to be set for perf to be able to inspect the +running process' stack. + +```sh +$ sudo sysctl -w kernel.perf_event_paranoid=-1 +$ sudo sysctl -w kernel.kptr_restrict=0 +``` + +Make sure you [understand the security +trade-offs](https://lwn.net/Articles/420403/) of setting these kernel +parameters. + +To profile a running dashd process for 60 seconds, you could use an +invocation of `perf record` like this: + +```sh +$ perf record \ + -g --call-graph dwarf --per-thread -F 140 \ + -p `pgrep dashd` -- sleep 60 +``` + +You could then analyze the results by running + +```sh +perf report --stdio | c++filt | less +``` + +or using a graphical tool like [Hotspot](https://github.com/KDAB/hotspot). + +See the functional test documentation for how to invoke perf within tests. + + **Sanitizers** Dash Core can be compiled with various "sanitizers" enabled, which add diff --git a/test/README.md b/test/README.md index 123247d4d0..47cd8e0f9c 100644 --- a/test/README.md +++ b/test/README.md @@ -206,6 +206,26 @@ gdb /home/example/dashd Note: gdb attach step may require ptrace_scope to be modified, or `sudo` preceding the `gdb`. See this link for considerations: https://www.kernel.org/doc/Documentation/security/Yama.txt +##### Profiling + +An easy way to profile node performance during functional tests is provided +for Linux platforms using `perf`. + +Perf will sample the running node and will generate profile data in the node's +datadir. The profile data can then be presented using `perf report` or a graphical +tool like [hotspot](https://github.com/KDAB/hotspot). + +To generate a profile during test suite runs, use the `--perf` flag. + +To see render the output to text, run + +```sh +perf report -i /path/to/datadir/send-big-msgs.perf.data.xxxx --stdio | c++filt | less +``` + +For ways to generate more granular profiles, see the README in +[test/functional](/test/functional). + ### Util tests Util tests can be run locally by running `test/util/bitcoin-util-test.py`. diff --git a/test/functional/README.md b/test/functional/README.md index 218439ab2c..9b306b410a 100644 --- a/test/functional/README.md +++ b/test/functional/README.md @@ -122,3 +122,36 @@ Helpers for script.py #### [test_framework/blocktools.py](test_framework/blocktools.py) Helper functions for creating blocks and transactions. + +### Benchmarking with perf + +An easy way to profile node performance during functional tests is provided +for Linux platforms using `perf`. + +Perf will sample the running node and will generate profile data in the node's +datadir. The profile data can then be presented using `perf report` or a graphical +tool like [hotspot](https://github.com/KDAB/hotspot). + +There are two ways of invoking perf: one is to use the `--perf` flag when +running tests, which will profile each node during the entire test run: perf +begins to profile when the node starts and ends when it shuts down. The other +way is the use the `profile_with_perf` context manager, e.g. + +```python +with node.profile_with_perf("send-big-msgs"): + # Perform activity on the node you're interested in profiling, e.g.: + for _ in range(10000): + node.p2p.send_message(some_large_message) +``` + +To see useful textual output, run + +```sh +perf report -i /path/to/datadir/send-big-msgs.perf.data.xxxx --stdio | c++filt | less +``` + +#### See also: + +- [Installing perf](https://askubuntu.com/q/50145) +- [Perf examples](http://www.brendangregg.com/perf.html) +- [Hotspot](https://github.com/KDAB/hotspot): a GUI for perf output analysis diff --git a/test/functional/test_framework/test_framework.py b/test/functional/test_framework/test_framework.py index e19d344171..840ce004ab 100755 --- a/test/functional/test_framework/test_framework.py +++ b/test/functional/test_framework/test_framework.py @@ -145,6 +145,8 @@ class BitcoinTestFramework(metaclass=BitcoinTestMetaClass): help="Pass extra args to all dashd instances") parser.add_argument("--timeoutscale", dest="timeout_scale", default=1, type=int, help="Scale the test timeouts by multiplying them with the here provided value (default: %(default)s)") + parser.add_argument("--perf", dest="perf", default=False, action="store_true", + help="profile running nodes with perf for the duration of the test") self.add_options(parser) self.options = parser.parse_args() @@ -227,11 +229,20 @@ class BitcoinTestFramework(metaclass=BitcoinTestMetaClass): node.cleanup_on_exit = False self.log.info("Note: dashds were not stopped and may still be running") - if not self.options.nocleanup and not self.options.noshutdown and success != TestStatus.FAILED: + should_clean_up = ( + not self.options.nocleanup and + not self.options.noshutdown and + success != TestStatus.FAILED and + not self.options.perf + ) + if should_clean_up: self.log.info("Cleaning up {} on exit".format(self.options.tmpdir)) cleanup_tree_on_exit = True + elif self.options.perf: + self.log.warning("Not cleaning up dir {} due to perf data".format(self.options.tmpdir)) + cleanup_tree_on_exit = False else: - self.log.warning("Not cleaning up dir %s" % self.options.tmpdir) + self.log.warning("Not cleaning up dir {}".format(self.options.tmpdir)) cleanup_tree_on_exit = False if success == TestStatus.PASSED: @@ -321,7 +332,7 @@ class BitcoinTestFramework(metaclass=BitcoinTestMetaClass): assert_equal(len(binary), num_nodes) for i in range(num_nodes): numnode = len(self.nodes) - self.nodes.append(TestNode(numnode, get_datadir_path(self.options.tmpdir, numnode), self.extra_args_from_options, chain=self.chain, rpchost=rpchost, timewait=self.rpc_timeout, bitcoind=binary[i], bitcoin_cli=self.options.bitcoincli, mocktime=self.mocktime, coverage_dir=self.options.coveragedir, extra_conf=extra_confs[i], extra_args=extra_args[i], use_cli=self.options.usecli)) + self.nodes.append(TestNode(numnode, get_datadir_path(self.options.tmpdir, numnode), self.extra_args_from_options, chain=self.chain, rpchost=rpchost, timewait=self.rpc_timeout, bitcoind=binary[i], bitcoin_cli=self.options.bitcoincli, mocktime=self.mocktime, coverage_dir=self.options.coveragedir, extra_conf=extra_confs[i], extra_args=extra_args[i], use_cli=self.options.usecli, start_perf=self.options.perf)) def start_node(self, i, *args, **kwargs): """Start a dashd""" diff --git a/test/functional/test_framework/test_node.py b/test/functional/test_framework/test_node.py index 5592c20632..c4a577c48c 100755 --- a/test/functional/test_framework/test_node.py +++ b/test/functional/test_framework/test_node.py @@ -17,6 +17,8 @@ import subprocess import tempfile import time import urllib.parse +import shlex +import sys from .authproxy import JSONRPCException from .util import ( @@ -60,7 +62,13 @@ class TestNode(): To make things easier for the test writer, any unrecognised messages will be dispatched to the RPC connection.""" - def __init__(self, i, datadir, extra_args_from_options, *, chain, rpchost, timewait, bitcoind, bitcoin_cli, mocktime, coverage_dir, extra_conf=None, extra_args=None, use_cli=False): + def __init__(self, i, datadir, extra_args_from_options, *, chain, rpchost, timewait, bitcoind, bitcoin_cli, mocktime, coverage_dir, extra_conf=None, extra_args=None, use_cli=False, start_perf=False): + """ + Kwargs: + start_perf (bool): If True, begin profiling the node with `perf` as soon as + the node starts. + """ + self.index = i self.datadir = datadir self.chain = chain @@ -93,6 +101,7 @@ class TestNode(): self.cli = TestNodeCLI(bitcoin_cli, self.datadir) self.use_cli = use_cli + self.start_perf = start_perf # Don't try auto backups (they fail a lot when running tests) self.args.append("-createwalletbackups=0") @@ -104,6 +113,8 @@ class TestNode(): self.url = None self.log = logging.getLogger('TestFramework.node%d' % i) self.cleanup_on_exit = True # Whether to kill the node when this object goes away + # Cache perf subprocesses here by their data output filename. + self.perf_subprocesses = {} self.p2ps = [] @@ -179,6 +190,9 @@ class TestNode(): self.running = True self.log.debug("dashd started, waiting for RPC to come up") + if self.start_perf: + self._start_perf() + def wait_for_rpc_connection(self): """Sets up an RPC connection to the dashd process. Returns False if unable to connect.""" # Poll at a rate of four times per second @@ -227,6 +241,10 @@ class TestNode(): except http.client.CannotSendRequest: self.log.exception("Unable to stop node.") + # If there are any running perf processes, stop them. + for profile_name in tuple(self.perf_subprocesses.keys()): + self._stop_perf(profile_name) + # Check that stderr is as expected self.stderr.seek(0) stderr = self.stderr.read().decode('utf-8').strip() @@ -280,6 +298,84 @@ class TestNode(): if re.search(re.escape(expected_msg), log, flags=re.MULTILINE) is None: self._raise_assertion_error('Expected message "{}" does not partially match log:\n\n{}\n\n'.format(expected_msg, print_log)) + @contextlib.contextmanager + def profile_with_perf(self, profile_name): + """ + Context manager that allows easy profiling of node activity using `perf`. + + See `test/functional/README.md` for details on perf usage. + + Args: + profile_name (str): This string will be appended to the + profile data filename generated by perf. + """ + subp = self._start_perf(profile_name) + + yield + + if subp: + self._stop_perf(profile_name) + + def _start_perf(self, profile_name=None): + """Start a perf process to profile this node. + + Returns the subprocess running perf.""" + subp = None + + def test_success(cmd): + return subprocess.call( + # shell=True required for pipe use below + cmd, shell=True, + stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL) == 0 + + if not sys.platform.startswith('linux'): + self.log.warning("Can't profile with perf; only availabe on Linux platforms") + return None + + if not test_success('which perf'): + self.log.warning("Can't profile with perf; must install perf-tools") + return None + + if not test_success('readelf -S {} | grep .debug_str'.format(shlex.quote(self.binary))): + self.log.warning( + "perf output won't be very useful without debug symbols compiled into bitcoind") + + output_path = tempfile.NamedTemporaryFile( + dir=self.datadir, + prefix="{}.perf.data.".format(profile_name or 'test'), + delete=False, + ).name + + cmd = [ + 'perf', 'record', + '-g', # Record the callgraph. + '--call-graph', 'dwarf', # Compatibility for gcc's --fomit-frame-pointer. + '-F', '101', # Sampling frequency in Hz. + '-p', str(self.process.pid), + '-o', output_path, + ] + subp = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + self.perf_subprocesses[profile_name] = subp + + return subp + + def _stop_perf(self, profile_name): + """Stop (and pop) a perf subprocess.""" + subp = self.perf_subprocesses.pop(profile_name) + output_path = subp.args[subp.args.index('-o') + 1] + + subp.terminate() + subp.wait(timeout=10) + + stderr = subp.stderr.read().decode() + if 'Consider tweaking /proc/sys/kernel/perf_event_paranoid' in stderr: + self.log.warning( + "perf couldn't collect data! Try " + "'sudo sysctl -w kernel.perf_event_paranoid=-1'") + else: + report_cmd = "perf report -i {}".format(output_path) + self.log.info("See perf output by running '{}'".format(report_cmd)) + def assert_start_raises_init_error(self, extra_args=None, expected_msg=None, match=ErrorMatch.FULL_TEXT, *args, **kwargs): """Attempt to start the node and expect it to raise an error. diff --git a/test/lint/lint-python-dead-code.sh b/test/lint/lint-python-dead-code.sh index 5c940a8b65..e5335dc5d3 100755 --- a/test/lint/lint-python-dead-code.sh +++ b/test/lint/lint-python-dead-code.sh @@ -15,5 +15,5 @@ fi vulture \ --min-confidence 60 \ - --ignore-names "argtypes,connection_lost,connection_made,converter,data_received,daemon,errcheck,get_ecdh_key,get_privkey,is_compressed,is_fullyvalid,msg_generic,on_*,optionxform,restype,set_privkey,*serialize_v2" \ + --ignore-names "argtypes,connection_lost,connection_made,converter,data_received,daemon,errcheck,get_ecdh_key,get_privkey,is_compressed,is_fullyvalid,msg_generic,on_*,optionxform,restype,set_privkey,*serialize_v2,profile_with_perf" \ $(git ls-files -- "*.py" ":(exclude)contrib/" ":(exclude)src/crc32c/" ":(exclude)test/functional/test_framework/address.py")