From 8b3a4867027f2d637107972c8736d0944f2775a1 Mon Sep 17 00:00:00 2001 From: "W. J. van der Laan" Date: Fri, 24 Sep 2021 17:10:06 +0200 Subject: [PATCH] Merge bitcoin/bitcoin#23025: bench: update nanobench add `-min_time` e148a5233292d156cda76cb20afb6641fc20f25e bench: fixed ubsan implicit conversion (Martin Ankerl) da4e2f1da0388d424659fa8c853fcaf37b4b5959 bench: various args improvements (Jon Atack) d312fd94a1083cdbf071f2888aab43c62d358151 bench: clean up includes (Jon Atack) 1f10f1663e53474038b9111c4264a250cffe7501 bench: add usage description and documentation (Martin Ankerl) d3c6f8bfa12f78635752878b28e66cec0c85d4a9 bench: introduce -min_time argument (Martin Ankerl) 9fef8329322277d9c14c8df1867cb3c61477c431 bench: make EvictionProtection.* work with any number of iterations (Martin Ankerl) 153e6860e84df0a3d52e5a3b2fe9c37b5e0b029a bench: change AddrManGood to AddrManAddThenGood (Martin Ankerl) 468b232f71562280aae16876bc257ec24f5fcccb bench: remove unnecessary & incorrect multiplication in MuHashDiv (Martin Ankerl) eed99cf272426e5957bee35dc8e7d0798aec8ec0 bench: update nanobench from 4.3.4 to 4.3.6 (Martin Ankerl) Pull request description: This PR updates the nanobench with the latest release from upstream, v4.3.6. It fixes the missing performance counters. Due to discussions on #22999 I have done some work that should make the benchmark results more reliable. It introduces a new flag `-min_time` that allows to run a benchmark for much longer then the default. When results are unreliable, choosing a large timeframe here should usually get repeatable results even when frequency scaling cannot be disabled. The default is now 10ms. For this to work I have changed the `AddrManGood` and `EvictionProtection` benchmarks so they work with any number of iterations. Also, this adds more usage documentation to `bench_bitcoin -h` and I've cherry-picked two changes from #22999 authored by Jon Atack ACKs for top commit: jonatack: re-ACK e148a5233292d156cda76cb20afb6641fc20f25e laanwj: Code review ACK e148a5233292d156cda76cb20afb6641fc20f25e Tree-SHA512: 2da6de19a5c85ac234b190025e195c727546166dbb75e3f9267e667a73677ba1e29b7765877418a42b1407b65df901e0130763936525e6f1450f18f08837c40c --- src/bench/bench.cpp | 15 +++++++-- src/bench/bench.h | 3 +- src/bench/bench_bitcoin.cpp | 64 ++++++++++++++++++++++++++++++++++--- src/bench/crypto_hash.cpp | 8 ++--- src/bench/peer_eviction.cpp | 10 +++--- src/bench/rollingbloom.cpp | 16 +++++----- 6 files changed, 88 insertions(+), 28 deletions(-) diff --git a/src/bench/bench.cpp b/src/bench/bench.cpp index 02ae5de7fd..d6ed76a1f4 100644 --- a/src/bench/bench.cpp +++ b/src/bench/bench.cpp @@ -4,15 +4,20 @@ #include -#include #include #include -#include #include +#include #include +#include #include +#include #include +#include +#include + +using namespace std::chrono_literals; const std::function G_TEST_LOG_FUN{}; @@ -66,6 +71,12 @@ void benchmark::BenchRunner::RunAll(const Args& args) Bench bench; bench.name(p.first); + if (args.min_time > 0ms) { + // convert to nanos before dividing to reduce rounding errors + std::chrono::nanoseconds min_time_ns = args.min_time; + bench.minEpochTime(min_time_ns / bench.epochs()); + } + if (args.asymptote.empty()) { p.second(bench); } else { diff --git a/src/bench/bench.h b/src/bench/bench.h index 6804273d52..4a838d1fe7 100644 --- a/src/bench/bench.h +++ b/src/bench/bench.h @@ -42,11 +42,12 @@ using ankerl::nanobench::Bench; typedef std::function BenchFunction; struct Args { - std::string regex_filter; bool is_list_only; + std::chrono::milliseconds min_time; std::vector asymptote; fs::path output_csv; fs::path output_json; + std::string regex_filter; }; class BenchRunner diff --git a/src/bench/bench_bitcoin.cpp b/src/bench/bench_bitcoin.cpp index 07bc619bd6..b12707dbf1 100644 --- a/src/bench/bench_bitcoin.cpp +++ b/src/bench/bench_bitcoin.cpp @@ -4,6 +4,7 @@ #include +#include #include #include #include @@ -11,16 +12,23 @@ #include #include +#include +#include +#include +#include +#include static const char* DEFAULT_BENCH_FILTER = ".*"; +static constexpr int64_t DEFAULT_MIN_TIME_MS{10}; static void SetupBenchArgs(ArgsManager& argsman) { SetupHelpOptions(argsman); - argsman.AddArg("-asymptote=n1,n2,n3,...", "Test asymptotic growth of the runtime of an algorithm, if supported by the benchmark", ArgsManager::ALLOW_ANY, OptionsCategory::OPTIONS); + argsman.AddArg("-asymptote=", "Test asymptotic growth of the runtime of an algorithm, if supported by the benchmark", ArgsManager::ALLOW_ANY, OptionsCategory::OPTIONS); argsman.AddArg("-filter=", strprintf("Regular expression filter to select benchmark by name (default: %s)", DEFAULT_BENCH_FILTER), ArgsManager::ALLOW_ANY, OptionsCategory::OPTIONS); - argsman.AddArg("-list", "List benchmarks without executing them", ArgsManager::ALLOW_ANY, OptionsCategory::OPTIONS); + argsman.AddArg("-list", "List benchmarks without executing them", ArgsManager::ALLOW_BOOL, OptionsCategory::OPTIONS); + argsman.AddArg("-min_time=", strprintf("Minimum runtime per benchmark, in milliseconds (default: %d)", DEFAULT_MIN_TIME_MS), ArgsManager::ALLOW_INT, OptionsCategory::OPTIONS); argsman.AddArg("-output_csv=", "Generate CSV file with the most important benchmark results", ArgsManager::ALLOW_ANY, OptionsCategory::OPTIONS); argsman.AddArg("-output_json=", "Generate JSON file with all benchmark results", ArgsManager::ALLOW_ANY, OptionsCategory::OPTIONS); } @@ -50,16 +58,62 @@ int main(int argc, char** argv) } if (HelpRequested(argsman)) { - std::cout << argsman.GetHelpMessage(); + std::cout << "Usage: bench_dash [options]\n" + "\n" + << argsman.GetHelpMessage() + << "Description:\n" + "\n" + " bench_dash executes microbenchmarks. The quality of the benchmark results\n" + " highly depend on the stability of the machine. It can sometimes be difficult\n" + " to get stable, repeatable results, so here are a few tips:\n" + "\n" + " * Use pyperf [1] to disable frequency scaling, turbo boost etc. For best\n" + " results, use CPU pinning and CPU isolation (see [2]).\n" + "\n" + " * Each call of run() should do exactly the same work. E.g. inserting into\n" + " a std::vector doesn't do that as it will reallocate on certain calls. Make\n" + " sure each run has exactly the same preconditions.\n" + "\n" + " * If results are still not reliable, increase runtime with e.g.\n" + " -min_time=5000 to let a benchmark run for at least 5 seconds.\n" + "\n" + " * bench_dash uses nanobench [3] for which there is extensive\n" + " documentation available online.\n" + "\n" + "Environment Variables:\n" + "\n" + " To attach a profiler you can run a benchmark in endless mode. This can be\n" + " done with the environment variable NANOBENCH_ENDLESS. E.g. like so:\n" + "\n" + " NANOBENCH_ENDLESS=MuHash ./bench_dash -filter=MuHash\n" + "\n" + " In rare cases it can be useful to suppress stability warnings. This can be\n" + " done with the environment variable NANOBENCH_SUPPRESS_WARNINGS, e.g:\n" + "\n" + " NANOBENCH_SUPPRESS_WARNINGS=1 ./bench_dash\n" + "\n" + "Notes:\n" + "\n" + " 1. pyperf\n" + " https://github.com/psf/pyperf\n" + "\n" + " 2. CPU pinning & isolation\n" + " https://pyperf.readthedocs.io/en/latest/system.html\n" + "\n" + " 3. nanobench\n" + " https://github.com/martinus/nanobench\n" + "\n"; + return EXIT_SUCCESS; } benchmark::Args args; - args.regex_filter = argsman.GetArg("-filter", DEFAULT_BENCH_FILTER); - args.is_list_only = argsman.GetBoolArg("-list", false); args.asymptote = parseAsymptote(argsman.GetArg("-asymptote", "")); + args.is_list_only = argsman.GetBoolArg("-list", false); + args.min_time = std::chrono::milliseconds(argsman.GetArg("-min_time", DEFAULT_MIN_TIME_MS)); args.output_csv = fs::PathFromString(argsman.GetArg("-output_csv", "")); args.output_json = fs::PathFromString(argsman.GetArg("-output_json", "")); + args.regex_filter = argsman.GetArg("-filter", DEFAULT_BENCH_FILTER); benchmark::BenchRunner::RunAll(args); diff --git a/src/bench/crypto_hash.cpp b/src/bench/crypto_hash.cpp index cb00185b2d..056ccf0e19 100644 --- a/src/bench/crypto_hash.cpp +++ b/src/bench/crypto_hash.cpp @@ -249,9 +249,9 @@ static void MuHash(benchmark::Bench& bench) { MuHash3072 acc; unsigned char key[32] = {0}; - int i = 0; + uint32_t i = 0; bench.run([&] { - key[0] = ++i; + key[0] = ++i & 0xFF; acc *= MuHash3072(key); }); } @@ -273,10 +273,6 @@ static void MuHashDiv(benchmark::Bench& bench) FastRandomContext rng(true); MuHash3072 muhash{rng.randbytes(32)}; - for (size_t i = 0; i < bench.epochIterations(); ++i) { - acc *= muhash; - } - bench.run([&] { acc /= muhash; }); diff --git a/src/bench/peer_eviction.cpp b/src/bench/peer_eviction.cpp index d5086bff85..f05f5e8f64 100644 --- a/src/bench/peer_eviction.cpp +++ b/src/bench/peer_eviction.cpp @@ -20,19 +20,17 @@ static void EvictionProtectionCommon( { using Candidates = std::vector; FastRandomContext random_context{true}; - bench.warmup(100).epochIterations(1100); Candidates candidates{GetRandomNodeEvictionCandidates(num_candidates, random_context)}; for (auto& c : candidates) { candidate_setup_fn(c); } - std::vector copies{ - static_cast(bench.epochs() * bench.epochIterations()), candidates}; - size_t i{0}; + bench.run([&] { - ProtectEvictionCandidatesByRatio(copies.at(i)); - ++i; + // creating a copy has an overhead of about 3%, so it does not influence the benchmark results much. + auto copy = candidates; + ProtectEvictionCandidatesByRatio(copy); }); } diff --git a/src/bench/rollingbloom.cpp b/src/bench/rollingbloom.cpp index 997ab56549..28167767db 100644 --- a/src/bench/rollingbloom.cpp +++ b/src/bench/rollingbloom.cpp @@ -13,16 +13,16 @@ static void RollingBloom(benchmark::Bench& bench) uint32_t count = 0; bench.run([&] { count++; - data[0] = count; - data[1] = count >> 8; - data[2] = count >> 16; - data[3] = count >> 24; + data[0] = count & 0xFF; + data[1] = (count >> 8) & 0xFF; + data[2] = (count >> 16) & 0xFF; + data[3] = (count >> 24) & 0xFF; filter.insert(data); - data[0] = count >> 24; - data[1] = count >> 16; - data[2] = count >> 8; - data[3] = count; + data[0] = (count >> 24) & 0xFF; + data[1] = (count >> 16) & 0xFF; + data[2] = (count >> 8) & 0xFF; + data[3] = count & 0xFF; filter.contains(data); }); }