diff --git a/src/bench/base58.cpp b/src/bench/base58.cpp index 2d9a9f2908..294fcc3c33 100644 --- a/src/bench/base58.cpp +++ b/src/bench/base58.cpp @@ -54,6 +54,6 @@ static void Base58Decode(benchmark::State& state) } -BENCHMARK(Base58Encode); -BENCHMARK(Base58CheckEncode); -BENCHMARK(Base58Decode); +BENCHMARK(Base58Encode, 470 * 1000); +BENCHMARK(Base58CheckEncode, 320 * 1000); +BENCHMARK(Base58Decode, 800 * 1000); diff --git a/src/bench/bench.cpp b/src/bench/bench.cpp index 54fb8c7106..9ff7b4e156 100644 --- a/src/bench/bench.cpp +++ b/src/bench/bench.cpp @@ -8,102 +8,139 @@ #include #include #include +#include +#include +#include -benchmark::BenchRunner::BenchmarkMap &benchmark::BenchRunner::benchmarks() { - static std::map benchmarks_map; +void benchmark::ConsolePrinter::header() +{ + std::cout << "# Benchmark, evals, iterations, total, min, max, median" << std::endl; +} + +void benchmark::ConsolePrinter::result(const State& state) +{ + auto results = state.m_elapsed_results; + std::sort(results.begin(), results.end()); + + double total = state.m_num_iters * std::accumulate(results.begin(), results.end(), 0.0); + + double front = 0; + double back = 0; + double median = 0; + + if (!results.empty()) { + front = results.front(); + back = results.back(); + + size_t mid = results.size() / 2; + median = results[mid]; + if (0 == results.size() % 2) { + median = (results[mid] + results[mid + 1]) / 2; + } + } + + std::cout << std::setprecision(6); + std::cout << state.m_name << ", " << state.m_num_evals << ", " << state.m_num_iters << ", " << total << ", " << front << ", " << back << ", " << median << std::endl; +} + +void benchmark::ConsolePrinter::footer() {} +benchmark::PlotlyPrinter::PlotlyPrinter(std::string plotly_url, int64_t width, int64_t height) + : m_plotly_url(plotly_url), m_width(width), m_height(height) +{ +} + +void benchmark::PlotlyPrinter::header() +{ + std::cout << "" + << "" + << "
" + << ""; +} + + +benchmark::BenchRunner::BenchmarkMap& benchmark::BenchRunner::benchmarks() +{ + static std::map benchmarks_map; return benchmarks_map; } -benchmark::BenchRunner::BenchRunner(std::string name, benchmark::BenchFunction func) +benchmark::BenchRunner::BenchRunner(std::string name, benchmark::BenchFunction func, uint64_t num_iters_for_one_second) { - benchmarks().insert(std::make_pair(name, func)); + benchmarks().insert(std::make_pair(name, Bench{func, num_iters_for_one_second})); } -void -benchmark::BenchRunner::RunAll(benchmark::duration elapsedTimeForOne) +void benchmark::BenchRunner::RunAll(Printer& printer, uint64_t num_evals, double scaling, const std::string& filter, bool is_list_only) { perf_init(); - if (std::ratio_less_equal::value) { + if (!std::ratio_less_equal::value) { std::cerr << "WARNING: Clock precision is worse than microsecond - benchmarks may be less accurate!\n"; } #ifdef DEBUG std::cerr << "WARNING: This is a debug build - may result in slower benchmarks.\n"; #endif - std::cout << "#Benchmark" << "," << "count" << "," << "min(ns)" << "," << "max(ns)" << "," << "average(ns)" << "," - << "min_cycles" << "," << "max_cycles" << "," << "average_cycles" << "\n"; + std::regex reFilter(filter); + std::smatch baseMatch; - for (const auto &p: benchmarks()) { - State state(p.first, elapsedTimeForOne); - p.second(state); + printer.header(); + + for (const auto& p : benchmarks()) { + if (!std::regex_match(p.first, baseMatch, reFilter)) { + continue; + } + + uint64_t num_iters = static_cast(p.second.num_iters_for_one_second * scaling); + if (0 == num_iters) { + num_iters = 1; + } + State state(p.first, num_evals, num_iters, printer); + if (!is_list_only) { + p.second.func(state); + } + printer.result(state); } + + printer.footer(); + perf_fini(); } -bool benchmark::State::KeepRunning() +bool benchmark::State::UpdateTimer(const benchmark::time_point current_time) { - if (count & countMask) { - ++count; - return true; - } - time_point now; + if (m_start_time != time_point()) { + std::chrono::duration diff = current_time - m_start_time; + m_elapsed_results.push_back(diff.count() / m_num_iters); - uint64_t nowCycles; - if (count == 0) { - lastTime = beginTime = now = clock::now(); - lastCycles = beginCycles = nowCycles = perf_cpucycles(); - } - else { - now = clock::now(); - auto elapsed = now - lastTime; - auto elapsedOne = elapsed / (countMask + 1); - if (elapsedOne < minTime) minTime = elapsedOne; - if (elapsedOne > maxTime) maxTime = elapsedOne; - - // We only use relative values, so don't have to handle 64-bit wrap-around specially - nowCycles = perf_cpucycles(); - uint64_t elapsedOneCycles = (nowCycles - lastCycles) / (countMask + 1); - if (elapsedOneCycles < minCycles) minCycles = elapsedOneCycles; - if (elapsedOneCycles > maxCycles) maxCycles = elapsedOneCycles; - - if (elapsed*128 < maxElapsed) { - // If the execution was much too fast (1/128th of maxElapsed), increase the count mask by 8x and restart timing. - // The restart avoids including the overhead of this code in the measurement. - countMask = ((countMask<<3)|7) & ((1LL<<60)-1); - count = 0; - minTime = duration::max(); - maxTime = duration::zero(); - minCycles = std::numeric_limits::max(); - maxCycles = std::numeric_limits::min(); - return true; - } - if (elapsed*16 < maxElapsed) { - uint64_t newCountMask = ((countMask<<1)|1) & ((1LL<<60)-1); - if ((count & newCountMask)==0) { - countMask = newCountMask; - } + if (m_elapsed_results.size() == m_num_evals) { + return false; } } - lastTime = now; - lastCycles = nowCycles; - ++count; - if (now - beginTime < maxElapsed) return true; // Keep going - - --count; - - assert(count != 0 && "count == 0 => (now == 0 && beginTime == 0) => return above"); - - // Output results - // Duration casts are only necessary here because hardware with sub-nanosecond clocks - // will lose precision. - int64_t min_elapsed = std::chrono::duration_cast(minTime).count(); - int64_t max_elapsed = std::chrono::duration_cast(maxTime).count(); - int64_t avg_elapsed = std::chrono::duration_cast((now-beginTime)/count).count(); - int64_t averageCycles = (nowCycles-beginCycles)/count; - std::cout << std::fixed << std::setprecision(15) << name << "," << count << "," << min_elapsed << "," << max_elapsed << "," << avg_elapsed << "," - << minCycles << "," << maxCycles << "," << averageCycles << "\n"; - std::cout.copyfmt(std::ios(nullptr)); - - return false; + m_num_iters_left = m_num_iters - 1; + return true; } diff --git a/src/bench/bench.h b/src/bench/bench.h index 15f65e3522..452d83f5a9 100644 --- a/src/bench/bench.h +++ b/src/bench/bench.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -32,64 +33,110 @@ static void CODE_TO_TIME(benchmark::State& state) ... do any cleanup needed... } -BENCHMARK(CODE_TO_TIME); +// default to running benchmark for 5000 iterations +BENCHMARK(CODE_TO_TIME, 5000); */ - + namespace benchmark { - // In case high_resolution_clock is steady, prefer that, otherwise use steady_clock. - struct best_clock { - using hi_res_clock = std::chrono::high_resolution_clock; - using steady_clock = std::chrono::steady_clock; - using type = std::conditional::type; - }; - using clock = best_clock::type; - using time_point = clock::time_point; - using duration = clock::duration; +// In case high_resolution_clock is steady, prefer that, otherwise use steady_clock. +struct best_clock { + using hi_res_clock = std::chrono::high_resolution_clock; + using steady_clock = std::chrono::steady_clock; + using type = std::conditional::type; +}; +using clock = best_clock::type; +using time_point = clock::time_point; +using duration = clock::duration; - class State { - std::string name; - duration maxElapsed; - time_point beginTime, lastTime; - duration minTime, maxTime; - uint64_t count; - uint64_t countMask; - uint64_t beginCycles; - uint64_t lastCycles; - uint64_t minCycles; - uint64_t maxCycles; - public: - State(std::string _name, duration _maxElapsed) : - name(_name), - maxElapsed(_maxElapsed), - minTime(duration::max()), - maxTime(duration::zero()), - count(0), - countMask(1), - beginCycles(0), - lastCycles(0), - minCycles(std::numeric_limits::max()), - maxCycles(std::numeric_limits::min()) { - } - bool KeepRunning(); - }; +class Printer; - typedef std::function BenchFunction; +class State +{ +public: + std::string m_name; + uint64_t m_num_iters_left; + const uint64_t m_num_iters; + const uint64_t m_num_evals; + std::vector m_elapsed_results; + time_point m_start_time; - class BenchRunner + bool UpdateTimer(time_point finish_time); + + State(std::string name, uint64_t num_evals, double num_iters, Printer& printer) : m_name(name), m_num_iters_left(0), m_num_iters(num_iters), m_num_evals(num_evals) { - typedef std::map BenchmarkMap; - static BenchmarkMap &benchmarks(); + } - public: - BenchRunner(std::string name, BenchFunction func); + inline bool KeepRunning() + { + if (m_num_iters_left--) { + return true; + } - static void RunAll(duration elapsedTimeForOne = std::chrono::seconds(1)); + bool result = UpdateTimer(clock::now()); + // measure again so runtime of UpdateTimer is not included + m_start_time = clock::now(); + return result; + } +}; + +typedef std::function BenchFunction; + +class BenchRunner +{ + struct Bench { + BenchFunction func; + uint64_t num_iters_for_one_second; }; + typedef std::map BenchmarkMap; + static BenchmarkMap& benchmarks(); + +public: + BenchRunner(std::string name, BenchFunction func, uint64_t num_iters_for_one_second); + + static void RunAll(Printer& printer, uint64_t num_evals, double scaling, const std::string& filter, bool is_list_only); +}; + +// interface to output benchmark results. +class Printer +{ +public: + virtual ~Printer() {} + virtual void header() = 0; + virtual void result(const State& state) = 0; + virtual void footer() = 0; +}; + +// default printer to console, shows min, max, median. +class ConsolePrinter : public Printer +{ +public: + void header(); + void result(const State& state); + void footer(); +}; + +// creates box plot with plotly.js +class PlotlyPrinter : public Printer +{ +public: + PlotlyPrinter(std::string plotly_url, int64_t width, int64_t height); + void header(); + void result(const State& state); + void footer(); + +private: + std::string m_plotly_url; + int64_t m_width; + int64_t m_height; +}; } -// BENCHMARK(foo) expands to: benchmark::BenchRunner bench_11foo("foo", foo); -#define BENCHMARK(n) \ - benchmark::BenchRunner BOOST_PP_CAT(bench_, BOOST_PP_CAT(__LINE__, n))(BOOST_PP_STRINGIZE(n), n); + +// BENCHMARK(foo, num_iters_for_one_second) expands to: benchmark::BenchRunner bench_11foo("foo", num_iterations); +// Choose a num_iters_for_one_second that takes roughly 1 second. The goal is that all benchmarks should take approximately +// the same time, and scaling factor can be used that the total time is appropriate for your system. +#define BENCHMARK(n, num_iters_for_one_second) \ + benchmark::BenchRunner BOOST_PP_CAT(bench_, BOOST_PP_CAT(__LINE__, n))(BOOST_PP_STRINGIZE(n), n, (num_iters_for_one_second)); #endif // BITCOIN_BENCH_BENCH_H diff --git a/src/bench/bench_dash.cpp b/src/bench/bench_dash.cpp index 5b584787b9..ad25cd2eef 100644 --- a/src/bench/bench_dash.cpp +++ b/src/bench/bench_dash.cpp @@ -11,8 +11,20 @@ #include #include +#include + +#include + #include +static const int64_t DEFAULT_BENCH_EVALUATIONS = 5; +static const char* DEFAULT_BENCH_FILTER = ".*"; +static const char* DEFAULT_BENCH_SCALING = "1.0"; +static const char* DEFAULT_BENCH_PRINTER = "console"; +static const char* DEFAULT_PLOT_PLOTLYURL = "https://cdn.plot.ly/plotly-latest.min.js"; +static const int64_t DEFAULT_PLOT_WIDTH = 1024; +static const int64_t DEFAULT_PLOT_HEIGHT = 768; + void InitBLSTests(); void CleanupBLSTests(); void CleanupBLSDkgTests(); @@ -20,6 +32,23 @@ void CleanupBLSDkgTests(); int main(int argc, char** argv) { + gArgs.ParseParameters(argc, argv); + + if (gArgs.IsArgSet("-?") || gArgs.IsArgSet("-h") || gArgs.IsArgSet("-help")) { + std::cout << HelpMessageGroup(_("Options:")) + << HelpMessageOpt("-?", _("Print this help message and exit")) + << HelpMessageOpt("-list", _("List benchmarks without executing them. Can be combined with -scaling and -filter")) + << HelpMessageOpt("-evals=", strprintf(_("Number of measurement evaluations to perform. (default: %u)"), DEFAULT_BENCH_EVALUATIONS)) + << HelpMessageOpt("-filter=", strprintf(_("Regular expression filter to select benchmark by name (default: %s)"), DEFAULT_BENCH_FILTER)) + << HelpMessageOpt("-scaling=", strprintf(_("Scaling factor for benchmark's runtime (default: %u)"), DEFAULT_BENCH_SCALING)) + << HelpMessageOpt("-printer=(console|plot)", strprintf(_("Choose printer format. console: print data to console. plot: Print results as HTML graph (default: %s)"), DEFAULT_BENCH_PRINTER)) + << HelpMessageOpt("-plot-plotlyurl=", strprintf(_("URL to use for plotly.js (default: %s)"), DEFAULT_PLOT_PLOTLYURL)) + << HelpMessageOpt("-plot-width=", strprintf(_("Plot width in pixel (default: %u)"), DEFAULT_PLOT_WIDTH)) + << HelpMessageOpt("-plot-height=", strprintf(_("Plot height in pixel (default: %u)"), DEFAULT_PLOT_HEIGHT)); + + return 0; + } + SHA256AutoDetect(); RegisterPrettySignalHandlers(); @@ -34,7 +63,24 @@ main(int argc, char** argv) SetupEnvironment(); fPrintToDebugLog = false; // don't want to write to debug.log file - benchmark::BenchRunner::RunAll(); + int64_t evaluations = gArgs.GetArg("-evals", DEFAULT_BENCH_EVALUATIONS); + std::string regex_filter = gArgs.GetArg("-filter", DEFAULT_BENCH_FILTER); + std::string scaling_str = gArgs.GetArg("-scaling", DEFAULT_BENCH_SCALING); + bool is_list_only = gArgs.GetBoolArg("-list", false); + + double scaling_factor = boost::lexical_cast(scaling_str); + + + std::unique_ptr printer(new benchmark::ConsolePrinter()); + std::string printer_arg = gArgs.GetArg("-printer", DEFAULT_BENCH_PRINTER); + if ("plot" == printer_arg) { + printer.reset(new benchmark::PlotlyPrinter( + gArgs.GetArg("-plot-plotlyurl", DEFAULT_PLOT_PLOTLYURL), + gArgs.GetArg("-plot-width", DEFAULT_PLOT_WIDTH), + gArgs.GetArg("-plot-height", DEFAULT_PLOT_HEIGHT))); + } + + benchmark::BenchRunner::RunAll(*printer, evaluations, scaling_factor, regex_filter, is_list_only); // need to be called before global destructors kick in (PoolAllocator is needed due to many BLSSecretKeys) CleanupBLSDkgTests(); diff --git a/src/bench/bls.cpp b/src/bench/bls.cpp index a0506ec7e8..f11d976078 100644 --- a/src/bench/bls.cpp +++ b/src/bench/bls.cpp @@ -348,15 +348,15 @@ static void BLSVerify_BatchedParallel(benchmark::State& state) } } -BENCHMARK(BLSPubKeyAggregate_Normal) -BENCHMARK(BLSSecKeyAggregate_Normal) -BENCHMARK(BLSSign_Normal) -BENCHMARK(BLSVerify_Normal) -BENCHMARK(BLSVerify_LargeBlock1000) -BENCHMARK(BLSVerify_LargeBlockSelfAggregated1000) -BENCHMARK(BLSVerify_LargeBlockSelfAggregated10000) -BENCHMARK(BLSVerify_LargeAggregatedBlock1000) -BENCHMARK(BLSVerify_LargeAggregatedBlock10000) -BENCHMARK(BLSVerify_LargeAggregatedBlock1000PreVerified) -BENCHMARK(BLSVerify_Batched) -BENCHMARK(BLSVerify_BatchedParallel) +BENCHMARK(BLSPubKeyAggregate_Normal, 300 * 1000) +BENCHMARK(BLSSecKeyAggregate_Normal, 700 * 1000) +BENCHMARK(BLSSign_Normal, 600) +BENCHMARK(BLSVerify_Normal, 350) +BENCHMARK(BLSVerify_LargeBlock1000, 1) +BENCHMARK(BLSVerify_LargeBlockSelfAggregated1000, 1) +BENCHMARK(BLSVerify_LargeBlockSelfAggregated10000, 1) +BENCHMARK(BLSVerify_LargeAggregatedBlock1000, 1) +BENCHMARK(BLSVerify_LargeAggregatedBlock10000, 1) +BENCHMARK(BLSVerify_LargeAggregatedBlock1000PreVerified, 5) +BENCHMARK(BLSVerify_Batched, 500) +BENCHMARK(BLSVerify_BatchedParallel, 1000) diff --git a/src/bench/bls_dkg.cpp b/src/bench/bls_dkg.cpp index 0ae210533e..031304d11a 100644 --- a/src/bench/bls_dkg.cpp +++ b/src/bench/bls_dkg.cpp @@ -137,45 +137,45 @@ void CleanupBLSDkgTests() -#define BENCH_BuildQuorumVerificationVectors(name, quorumSize, parallel) \ +#define BENCH_BuildQuorumVerificationVectors(name, quorumSize, parallel, num_iters_for_one_second) \ static void BLSDKG_BuildQuorumVerificationVectors_##name##_##quorumSize(benchmark::State& state) \ { \ InitIfNeeded(); \ dkg##quorumSize->Bench_BuildQuorumVerificationVectors(state, parallel); \ } \ - BENCHMARK(BLSDKG_BuildQuorumVerificationVectors_##name##_##quorumSize) + BENCHMARK(BLSDKG_BuildQuorumVerificationVectors_##name##_##quorumSize, num_iters_for_one_second) -BENCH_BuildQuorumVerificationVectors(simple, 10, false) -BENCH_BuildQuorumVerificationVectors(simple, 100, false) -BENCH_BuildQuorumVerificationVectors(simple, 400, false) -BENCH_BuildQuorumVerificationVectors(parallel, 10, true) -BENCH_BuildQuorumVerificationVectors(parallel, 100, true) -BENCH_BuildQuorumVerificationVectors(parallel, 400, true) +BENCH_BuildQuorumVerificationVectors(simple, 10, false, 2500) +BENCH_BuildQuorumVerificationVectors(simple, 100, false, 25) +BENCH_BuildQuorumVerificationVectors(simple, 400, false, 1) +BENCH_BuildQuorumVerificationVectors(parallel, 10, true, 3000) +BENCH_BuildQuorumVerificationVectors(parallel, 100, true, 50) +BENCH_BuildQuorumVerificationVectors(parallel, 400, true, 3) /////////////////////////////// -#define BENCH_VerifyContributionShares(name, quorumSize, invalidCount, parallel, aggregated) \ +#define BENCH_VerifyContributionShares(name, quorumSize, invalidCount, parallel, aggregated, num_iters_for_one_second) \ static void BLSDKG_VerifyContributionShares_##name##_##quorumSize(benchmark::State& state) \ { \ InitIfNeeded(); \ dkg##quorumSize->Bench_VerifyContributionShares(state, invalidCount, parallel, aggregated); \ } \ - BENCHMARK(BLSDKG_VerifyContributionShares_##name##_##quorumSize) + BENCHMARK(BLSDKG_VerifyContributionShares_##name##_##quorumSize, num_iters_for_one_second) -BENCH_VerifyContributionShares(simple, 10, 5, false, false) -BENCH_VerifyContributionShares(simple, 100, 5, false, false) -BENCH_VerifyContributionShares(simple, 400, 5, false, false) +BENCH_VerifyContributionShares(simple, 10, 5, false, false, 70) +BENCH_VerifyContributionShares(simple, 100, 5, false, false, 1) +BENCH_VerifyContributionShares(simple, 400, 5, false, false, 1) -BENCH_VerifyContributionShares(aggregated, 10, 5, false, true) -BENCH_VerifyContributionShares(aggregated, 100, 5, false, true) -BENCH_VerifyContributionShares(aggregated, 400, 5, false, true) +BENCH_VerifyContributionShares(aggregated, 10, 5, false, true, 70) +BENCH_VerifyContributionShares(aggregated, 100, 5, false, true, 2) +BENCH_VerifyContributionShares(aggregated, 400, 5, false, true, 1) -BENCH_VerifyContributionShares(parallel, 10, 5, true, false) -BENCH_VerifyContributionShares(parallel, 100, 5, true, false) -BENCH_VerifyContributionShares(parallel, 400, 5, true, false) +BENCH_VerifyContributionShares(parallel, 10, 5, true, false, 200) +BENCH_VerifyContributionShares(parallel, 100, 5, true, false, 2) +BENCH_VerifyContributionShares(parallel, 400, 5, true, false, 1) -BENCH_VerifyContributionShares(parallel_aggregated, 10, 5, true, true) -BENCH_VerifyContributionShares(parallel_aggregated, 100, 5, true, true) -BENCH_VerifyContributionShares(parallel_aggregated, 400, 5, true, true) +BENCH_VerifyContributionShares(parallel_aggregated, 10, 5, true, true, 150) +BENCH_VerifyContributionShares(parallel_aggregated, 100, 5, true, true, 4) +BENCH_VerifyContributionShares(parallel_aggregated, 400, 5, true, true, 1) diff --git a/src/bench/ccoins_caching.cpp b/src/bench/ccoins_caching.cpp index 89ba3d3d21..1bce0fffbd 100644 --- a/src/bench/ccoins_caching.cpp +++ b/src/bench/ccoins_caching.cpp @@ -84,4 +84,4 @@ static void CCoinsCaching(benchmark::State& state) } } -BENCHMARK(CCoinsCaching); +BENCHMARK(CCoinsCaching, 170 * 1000); diff --git a/src/bench/chacha20.cpp b/src/bench/chacha20.cpp index 4c2c8d9605..69d8c96ec0 100644 --- a/src/bench/chacha20.cpp +++ b/src/bench/chacha20.cpp @@ -41,11 +41,6 @@ static void CHACHA20_1MB(benchmark::State& state) CHACHA20(state, BUFFER_SIZE_LARGE); } -//TODO add back below once benchmarking backports are done -//BENCHMARK(CHACHA20_64BYTES, 500000); -//BENCHMARK(CHACHA20_256BYTES, 250000); -//BENCHMARK(CHACHA20_1MB, 340); -BENCHMARK(CHACHA20_64BYTES); -BENCHMARK(CHACHA20_256BYTES); -BENCHMARK(CHACHA20_1MB); - +BENCHMARK(CHACHA20_64BYTES, 500000); +BENCHMARK(CHACHA20_256BYTES, 250000); +BENCHMARK(CHACHA20_1MB, 340); diff --git a/src/bench/chacha_poly_aead.cpp b/src/bench/chacha_poly_aead.cpp index 3f05a949bc..f5f7297490 100644 --- a/src/bench/chacha_poly_aead.cpp +++ b/src/bench/chacha_poly_aead.cpp @@ -112,13 +112,12 @@ static void HASH_1MB(benchmark::State& state) HASH(state, BUFFER_SIZE_LARGE); } -//TODO add back below once benchmark backports are done -BENCHMARK(CHACHA20_POLY1305_AEAD_64BYTES_ONLY_ENCRYPT/*, 500000*/); -BENCHMARK(CHACHA20_POLY1305_AEAD_256BYTES_ONLY_ENCRYPT/*, 250000*/); -BENCHMARK(CHACHA20_POLY1305_AEAD_1MB_ONLY_ENCRYPT/*, 340*/); -BENCHMARK(CHACHA20_POLY1305_AEAD_64BYTES_ENCRYPT_DECRYPT/*, 500000*/); -BENCHMARK(CHACHA20_POLY1305_AEAD_256BYTES_ENCRYPT_DECRYPT/*, 250000*/); -BENCHMARK(CHACHA20_POLY1305_AEAD_1MB_ENCRYPT_DECRYPT/*, 340*/); -BENCHMARK(HASH_64BYTES/*, 500000*/); -BENCHMARK(HASH_256BYTES/*, 250000*/); -BENCHMARK(HASH_1MB/*, 340*/); +BENCHMARK(CHACHA20_POLY1305_AEAD_64BYTES_ONLY_ENCRYPT, 500000); +BENCHMARK(CHACHA20_POLY1305_AEAD_256BYTES_ONLY_ENCRYPT, 250000); +BENCHMARK(CHACHA20_POLY1305_AEAD_1MB_ONLY_ENCRYPT, 340); +BENCHMARK(CHACHA20_POLY1305_AEAD_64BYTES_ENCRYPT_DECRYPT, 500000); +BENCHMARK(CHACHA20_POLY1305_AEAD_256BYTES_ENCRYPT_DECRYPT, 250000); +BENCHMARK(CHACHA20_POLY1305_AEAD_1MB_ENCRYPT_DECRYPT, 340); +BENCHMARK(HASH_64BYTES, 500000); +BENCHMARK(HASH_256BYTES, 250000); +BENCHMARK(HASH_1MB, 340); diff --git a/src/bench/checkblock.cpp b/src/bench/checkblock.cpp index 8da9593104..33cf3a35f3 100644 --- a/src/bench/checkblock.cpp +++ b/src/bench/checkblock.cpp @@ -50,5 +50,5 @@ static void DeserializeAndCheckBlockTest(benchmark::State& state) } } -BENCHMARK(DeserializeBlockTest); -BENCHMARK(DeserializeAndCheckBlockTest); +BENCHMARK(DeserializeBlockTest, 130); +BENCHMARK(DeserializeAndCheckBlockTest, 160); diff --git a/src/bench/checkqueue.cpp b/src/bench/checkqueue.cpp index 35750aa1b6..4d41e28db6 100644 --- a/src/bench/checkqueue.cpp +++ b/src/bench/checkqueue.cpp @@ -12,51 +12,11 @@ #include -// This Benchmark tests the CheckQueue with the lightest -// weight Checks, so it should make any lock contention -// particularly visible static const int MIN_CORES = 2; static const size_t BATCHES = 101; static const size_t BATCH_SIZE = 30; static const int PREVECTOR_SIZE = 28; static const unsigned int QUEUE_BATCH_SIZE = 128; -static void CCheckQueueSpeed(benchmark::State& state) -{ - struct FakeJobNoWork { - bool operator()() - { - return true; - } - void swap(FakeJobNoWork& x){}; - }; - CCheckQueue queue {QUEUE_BATCH_SIZE}; - boost::thread_group tg; - for (auto x = 0; x < std::max(MIN_CORES, GetNumCores()); ++x) { - tg.create_thread([&]{queue.Thread();}); - } - while (state.KeepRunning()) { - CCheckQueueControl control(&queue); - - // We call Add a number of times to simulate the behavior of adding - // a block of transactions at once. - - std::vector> vBatches(BATCHES); - for (auto& vChecks : vBatches) { - vChecks.resize(BATCH_SIZE); - } - for (auto& vChecks : vBatches) { - // We can't make vChecks in the inner loop because we want to measure - // the cost of getting the memory to each thread and we might get the same - // memory - control.Add(vChecks); - } - // control waits for completion by RAII, but - // it is done explicitly here for clarity - control.Wait(); - } - tg.interrupt_all(); - tg.join_all(); -} // This Benchmark tests the CheckQueue with a slightly realistic workload, // where checks all contain a prevector that is indirect 50% of the time @@ -99,5 +59,4 @@ static void CCheckQueueSpeedPrevectorJob(benchmark::State& state) tg.interrupt_all(); tg.join_all(); } -BENCHMARK(CCheckQueueSpeed); -BENCHMARK(CCheckQueueSpeedPrevectorJob); +BENCHMARK(CCheckQueueSpeedPrevectorJob, 1400); diff --git a/src/bench/coin_selection.cpp b/src/bench/coin_selection.cpp index fb9ee30ff3..f14654307c 100644 --- a/src/bench/coin_selection.cpp +++ b/src/bench/coin_selection.cpp @@ -57,4 +57,4 @@ static void CoinSelection(benchmark::State& state) } } -BENCHMARK(CoinSelection); +BENCHMARK(CoinSelection, 650); diff --git a/src/bench/crypto_hash.cpp b/src/bench/crypto_hash.cpp index c36a99d5a9..584702c850 100644 --- a/src/bench/crypto_hash.cpp +++ b/src/bench/crypto_hash.cpp @@ -47,9 +47,9 @@ static void HASH_SHA256_0032b(benchmark::State& state) { std::vector in(32,0); while (state.KeepRunning()) { - for (int i = 0; i < 1000000; i++) { - CSHA256().Write(in.data(), in.size()).Finalize(in.data()); - } + CSHA256() + .Write(in.data(), in.size()) + .Finalize(in.data()); } } @@ -65,9 +65,7 @@ static void HASH_DSHA256_0032b(benchmark::State& state) { std::vector in(32,0); while (state.KeepRunning()) { - for (int i = 0; i < 1000000; i++) { - CHash256().Write(in.data(), in.size()).Finalize(in.data()); - } + CHash256().Write(in.data(), in.size()).Finalize(in.data()); } } @@ -90,10 +88,9 @@ static void HASH_SHA512(benchmark::State& state) static void HASH_SipHash_0032b(benchmark::State& state) { uint256 x; + uint64_t k1 = 0; while (state.KeepRunning()) { - for (int i = 0; i < 1000000; i++) { - *((uint64_t*)x.begin()) = SipHashUint256(0, i, x); - } + *((uint64_t*)x.begin()) = SipHashUint256(0, ++k1, x); } } @@ -102,9 +99,7 @@ static void FastRandom_32bit(benchmark::State& state) FastRandomContext rng(true); uint32_t x = 0; while (state.KeepRunning()) { - for (int i = 0; i < 1000000; i++) { - x += rng.rand32(); - } + x += rng.rand32(); } } @@ -113,9 +108,7 @@ static void FastRandom_1bit(benchmark::State& state) FastRandomContext rng(true); uint32_t x = 0; while (state.KeepRunning()) { - for (int i = 0; i < 1000000; i++) { - x += rng.randbool(); - } + x += rng.randbool(); } } @@ -217,29 +210,29 @@ static void HASH_X11_2048b_single(benchmark::State& state) hash = HashX11(in.begin(), in.end()); } -BENCHMARK(HASH_RIPEMD160); -BENCHMARK(HASH_SHA1); -BENCHMARK(HASH_SHA256); -BENCHMARK(HASH_DSHA256); -BENCHMARK(HASH_SHA512); -BENCHMARK(HASH_X11); +BENCHMARK(HASH_RIPEMD160, 440); +BENCHMARK(HASH_SHA1, 570); +BENCHMARK(HASH_SHA256, 340); +BENCHMARK(HASH_DSHA256, 340); +BENCHMARK(HASH_SHA512, 330); +BENCHMARK(HASH_X11, 500); -BENCHMARK(HASH_SHA256_0032b); -BENCHMARK(HASH_DSHA256_0032b); -BENCHMARK(HASH_SipHash_0032b); -BENCHMARK(HASH_SHA256D64_1024/*, 7400*/); +BENCHMARK(HASH_SHA256_0032b, 4 * 1000 * 1000); +BENCHMARK(HASH_DSHA256_0032b, 2 * 1000 * 1000); +BENCHMARK(HASH_SipHash_0032b, 35 * 1000 * 1000); +BENCHMARK(HASH_SHA256D64_1024, 7400); -BENCHMARK(HASH_DSHA256_0032b_single); -BENCHMARK(HASH_DSHA256_0080b_single); -BENCHMARK(HASH_DSHA256_0128b_single); -BENCHMARK(HASH_DSHA256_0512b_single); -BENCHMARK(HASH_DSHA256_1024b_single); -BENCHMARK(HASH_DSHA256_2048b_single); -BENCHMARK(HASH_X11_0032b_single); -BENCHMARK(HASH_X11_0080b_single); -BENCHMARK(HASH_X11_0128b_single); -BENCHMARK(HASH_X11_0512b_single); -BENCHMARK(HASH_X11_1024b_single); -BENCHMARK(HASH_X11_2048b_single); -BENCHMARK(FastRandom_32bit); -BENCHMARK(FastRandom_1bit); +BENCHMARK(HASH_DSHA256_0032b_single, 2000 * 1000); +BENCHMARK(HASH_DSHA256_0080b_single, 1500 * 1000); +BENCHMARK(HASH_DSHA256_0128b_single, 1200 * 1000); +BENCHMARK(HASH_DSHA256_0512b_single, 500 * 1000); +BENCHMARK(HASH_DSHA256_1024b_single, 300 * 1000); +BENCHMARK(HASH_DSHA256_2048b_single, 150 * 1000); +BENCHMARK(HASH_X11_0032b_single, 70 * 1000); +BENCHMARK(HASH_X11_0080b_single, 65 * 1000); +BENCHMARK(HASH_X11_0128b_single, 60 * 1000); +BENCHMARK(HASH_X11_0512b_single, 50 * 1000); +BENCHMARK(HASH_X11_1024b_single, 50 * 1000); +BENCHMARK(HASH_X11_2048b_single, 50 * 1000); +BENCHMARK(FastRandom_32bit, 110 * 1000 * 1000); +BENCHMARK(FastRandom_1bit, 440 * 1000 * 1000); diff --git a/src/bench/ecdsa.cpp b/src/bench/ecdsa.cpp index 65ea6b1284..2f15541044 100644 --- a/src/bench/ecdsa.cpp +++ b/src/bench/ecdsa.cpp @@ -72,6 +72,6 @@ static void ECDSAVerify_LargeBlock(benchmark::State& state) } } -BENCHMARK(ECDSASign) -BENCHMARK(ECDSAVerify) -BENCHMARK(ECDSAVerify_LargeBlock) +BENCHMARK(ECDSASign, 22 * 1000) +BENCHMARK(ECDSAVerify, 15 * 1000) +BENCHMARK(ECDSAVerify_LargeBlock, 15) diff --git a/src/bench/examples.cpp b/src/bench/examples.cpp index 5ad431c9d6..718dd6064a 100644 --- a/src/bench/examples.cpp +++ b/src/bench/examples.cpp @@ -15,7 +15,7 @@ static void Sleep100ms(benchmark::State& state) } } -BENCHMARK(Sleep100ms); +BENCHMARK(Sleep100ms, 10); // Extremely fast-running benchmark: #include @@ -31,4 +31,4 @@ static void Trig(benchmark::State& state) } } -BENCHMARK(Trig); +BENCHMARK(Trig, 12 * 1000 * 1000); diff --git a/src/bench/gcs_filter.cpp b/src/bench/gcs_filter.cpp index f328b69ca3..6f4e384e3b 100644 --- a/src/bench/gcs_filter.cpp +++ b/src/bench/gcs_filter.cpp @@ -39,5 +39,5 @@ static void MatchGCSFilter(benchmark::State& state) } } -BENCHMARK(ConstructGCSFilter/*, 1000*/); -BENCHMARK(MatchGCSFilter/*, 50 * 1000*/); +BENCHMARK(ConstructGCSFilter, 1000); +BENCHMARK(MatchGCSFilter, 50 * 1000); diff --git a/src/bench/lockedpool.cpp b/src/bench/lockedpool.cpp index b0bfa95144..914e37a2ed 100644 --- a/src/bench/lockedpool.cpp +++ b/src/bench/lockedpool.cpp @@ -43,5 +43,4 @@ static void BenchLockedPool(benchmark::State& state) addr.clear(); } -BENCHMARK(BenchLockedPool); - +BENCHMARK(BenchLockedPool, 530); diff --git a/src/bench/mempool_eviction.cpp b/src/bench/mempool_eviction.cpp index 313d14ebc2..b301fa9191 100644 --- a/src/bench/mempool_eviction.cpp +++ b/src/bench/mempool_eviction.cpp @@ -111,4 +111,4 @@ static void MempoolEviction(benchmark::State& state) } } -BENCHMARK(MempoolEviction); +BENCHMARK(MempoolEviction, 41000); diff --git a/src/bench/merkle_root.cpp b/src/bench/merkle_root.cpp index 5ced641c06..fab12da311 100644 --- a/src/bench/merkle_root.cpp +++ b/src/bench/merkle_root.cpp @@ -23,4 +23,4 @@ static void MerkleRoot(benchmark::State& state) } } -BENCHMARK(MerkleRoot/*, 800*/); +BENCHMARK(MerkleRoot, 800); diff --git a/src/bench/poly1305.cpp b/src/bench/poly1305.cpp index 5701b9c6a4..12b84c907d 100644 --- a/src/bench/poly1305.cpp +++ b/src/bench/poly1305.cpp @@ -37,7 +37,6 @@ static void POLY1305_1MB(benchmark::State& state) POLY1305(state, BUFFER_SIZE_LARGE); } -//TODO add back below once benchmarking backports are done -BENCHMARK(POLY1305_64BYTES/*, 500000*/); -BENCHMARK(POLY1305_256BYTES/*, 250000*/); -BENCHMARK(POLY1305_1MB/*, 340*/); +BENCHMARK(POLY1305_64BYTES, 500000); +BENCHMARK(POLY1305_256BYTES, 250000); +BENCHMARK(POLY1305_1MB, 340); diff --git a/src/bench/prevector.cpp b/src/bench/prevector.cpp index 7ae23ed307..286669bfad 100644 --- a/src/bench/prevector.cpp +++ b/src/bench/prevector.cpp @@ -22,43 +22,36 @@ template static void PrevectorDestructor(benchmark::State& state) { while (state.KeepRunning()) { - for (auto x = 0; x < 1000; ++x) { - prevector<28, T> t0; - prevector<28, T> t1; - t0.resize(28); - t1.resize(29); - } + prevector<28, T> t0; + prevector<28, T> t1; + t0.resize(28); + t1.resize(29); } } template static void PrevectorClear(benchmark::State& state) { - + prevector<28, T> t0; + prevector<28, T> t1; while (state.KeepRunning()) { - for (auto x = 0; x < 1000; ++x) { - prevector<28, T> t0; - prevector<28, T> t1; - t0.resize(28); - t0.clear(); - t1.resize(29); - t1.clear(); - } + t0.resize(28); + t0.clear(); + t1.resize(29); + t1.clear(); } } template void PrevectorResize(benchmark::State& state) { + prevector<28, T> t0; + prevector<28, T> t1; while (state.KeepRunning()) { - prevector<28, T> t0; - prevector<28, T> t1; - for (auto x = 0; x < 1000; ++x) { - t0.resize(28); - t0.resize(0); - t1.resize(29); - t1.resize(0); - } + t0.resize(28); + t0.resize(0); + t1.resize(29); + t1.resize(0); } } @@ -66,15 +59,15 @@ void PrevectorResize(benchmark::State& state) static void Prevector ## name ## Nontrivial(benchmark::State& state) { \ Prevector ## name(state); \ } \ - BENCHMARK(Prevector ## name ## Nontrivial/*, nontrivops*/); \ + BENCHMARK(Prevector ## name ## Nontrivial, nontrivops); \ static void Prevector ## name ## Trivial(benchmark::State& state) { \ Prevector ## name(state); \ } \ - BENCHMARK(Prevector ## name ## Trivial/*, trivops*/); + BENCHMARK(Prevector ## name ## Trivial, trivops); -PREVECTOR_TEST(Clear, 28300, 88600) -PREVECTOR_TEST(Destructor, 28800, 88900) -PREVECTOR_TEST(Resize, 28900, 90300) +PREVECTOR_TEST(Clear, 80 * 1000 * 1000, 70 * 1000 * 1000) +PREVECTOR_TEST(Destructor, 800 * 1000 * 1000, 800 * 1000 * 1000) +PREVECTOR_TEST(Resize, 80 * 1000 * 1000, 70 * 1000 * 1000) #include @@ -86,11 +79,9 @@ static void PrevectorAssign(benchmark::State& state) t.resize(28); std::vector v; while (state.KeepRunning()) { - for (int i = 0; i < 1000; ++i) { - prevec::const_iterator b = t.begin() + 5; - prevec::const_iterator e = b + 20; - v.assign(b, e); - } + prevec::const_iterator b = t.begin() + 5; + prevec::const_iterator e = b + 20; + v.assign(b, e); } } @@ -100,13 +91,11 @@ static void PrevectorAssignTo(benchmark::State& state) t.resize(28); std::vector v; while (state.KeepRunning()) { - for (int i = 0; i < 1000; ++i) { - prevec::const_iterator b = t.begin() + 5; - prevec::const_iterator e = b + 20; - t.assign_to(b, e, v); - } + prevec::const_iterator b = t.begin() + 5; + prevec::const_iterator e = b + 20; + t.assign_to(b, e, v); } } -BENCHMARK(PrevectorAssign) -BENCHMARK(PrevectorAssignTo) +BENCHMARK(PrevectorAssign, 90 * 1000 * 1000) +BENCHMARK(PrevectorAssignTo, 700 * 1000 * 1000) diff --git a/src/bench/rollingbloom.cpp b/src/bench/rollingbloom.cpp index 452099b800..031355c06e 100644 --- a/src/bench/rollingbloom.cpp +++ b/src/bench/rollingbloom.cpp @@ -12,8 +12,6 @@ static void RollingBloom(benchmark::State& state) CRollingBloomFilter filter(120000, 0.000001); std::vector data(32); uint32_t count = 0; - uint32_t nEntriesPerGeneration = (120000 + 1) / 2; - uint32_t countnow = 0; uint64_t match = 0; while (state.KeepRunning()) { count++; @@ -21,16 +19,8 @@ static void RollingBloom(benchmark::State& state) data[1] = count >> 8; data[2] = count >> 16; data[3] = count >> 24; - if (countnow == nEntriesPerGeneration) { - auto b = benchmark::clock::now(); - filter.insert(data); - auto total = std::chrono::duration_cast(benchmark::clock::now() - b).count(); - std::cout << "RollingBloom-refresh,1," << total << "," << total << "," << total << "\n"; - countnow = 0; - } else { - filter.insert(data); - } - countnow++; + filter.insert(data); + data[0] = count >> 24; data[1] = count >> 16; data[2] = count >> 8; @@ -39,4 +29,4 @@ static void RollingBloom(benchmark::State& state) } } -BENCHMARK(RollingBloom); +BENCHMARK(RollingBloom, 1500 * 1000); diff --git a/src/bench/string_cast.cpp b/src/bench/string_cast.cpp index 7291cdf6d0..9aa0445884 100644 --- a/src/bench/string_cast.cpp +++ b/src/bench/string_cast.cpp @@ -105,14 +105,14 @@ static void strings_2_strptintf(benchmark::State& state) } } -BENCHMARK(int_atoi); -BENCHMARK(int_lexical_cast); -BENCHMARK(strings_1_itostr); -BENCHMARK(strings_1_lexical_cast); -BENCHMARK(strings_1_numberToString); -BENCHMARK(strings_1_to_string); -BENCHMARK(strings_2_multi_itostr); -BENCHMARK(strings_2_multi_lexical_cast); -BENCHMARK(strings_2_multi_numberToString); -BENCHMARK(strings_2_multi_to_string); -BENCHMARK(strings_2_strptintf); +BENCHMARK(int_atoi, 700 * 1000 * 1000); +BENCHMARK(int_lexical_cast, 40 * 1000 * 1000); +BENCHMARK(strings_1_itostr, 3 * 1000 * 1000); +BENCHMARK(strings_1_lexical_cast, 20 * 1000 * 1000); +BENCHMARK(strings_1_numberToString, 3 * 1000 * 1000); +BENCHMARK(strings_1_to_string, 8 * 1000 * 1000); +BENCHMARK(strings_2_multi_itostr, 500 * 1000); +BENCHMARK(strings_2_multi_lexical_cast, 2500 * 1000); +BENCHMARK(strings_2_multi_numberToString, 600 * 1000); +BENCHMARK(strings_2_multi_to_string, 1500 * 1000); +BENCHMARK(strings_2_strptintf, 700 * 1000); diff --git a/src/bench/util_time.cpp b/src/bench/util_time.cpp index 6900ff3f33..94cef29ac9 100644 --- a/src/bench/util_time.cpp +++ b/src/bench/util_time.cpp @@ -36,7 +36,7 @@ static void BenchTimeMillisSys(benchmark::State& state) } } -BENCHMARK(BenchTimeDeprecated/*, 100000000*/); -BENCHMARK(BenchTimeMillis/*, 6000000*/); -BENCHMARK(BenchTimeMillisSys/*, 6000000*/); -BENCHMARK(BenchTimeMock/*, 300000000*/); +BENCHMARK(BenchTimeDeprecated, 100000000); +BENCHMARK(BenchTimeMillis, 6000000); +BENCHMARK(BenchTimeMillisSys, 6000000); +BENCHMARK(BenchTimeMock, 300000000);