From ba4696718eb3913bdab814d789b98d6c7f6e1f10 Mon Sep 17 00:00:00 2001 From: Kittywhiskers Van Gogh <63189531+kwvg@users.noreply.github.com> Date: Fri, 17 Sep 2021 11:58:57 +0200 Subject: [PATCH] partial bitcoin#23025: update nanobench add `-min_time` includes: - eed99cf272426e5957bee35dc8e7d0798aec8ec0 - 153e6860e84df0a3d52e5a3b2fe9c37b5e0b029a --- src/bench/addrman.cpp | 37 +++++++++++++++---------------------- src/bench/nanobench.h | 43 ++++++++++++++++++++++++------------------- 2 files changed, 39 insertions(+), 41 deletions(-) diff --git a/src/bench/addrman.cpp b/src/bench/addrman.cpp index 535dad15b3..b2c98df11b 100644 --- a/src/bench/addrman.cpp +++ b/src/bench/addrman.cpp @@ -101,40 +101,33 @@ static void AddrManGetAddr(benchmark::Bench& bench) }); } -static void AddrManGood(benchmark::Bench& bench) +static void AddrManAddThenGood(benchmark::Bench& bench) { - /* Create many AddrMan objects - one to be modified at each loop iteration. - * This is necessary because the AddrMan::Good() method modifies the - * object, affecting the timing of subsequent calls to the same method and - * we want to do the same amount of work in every loop iteration. */ - - bench.epochs(5).epochIterations(1); - const size_t addrman_count{bench.epochs() * bench.epochIterations()}; - - std::vector> addrmans(addrman_count); - for (size_t i{0}; i < addrman_count; ++i) { - addrmans[i] = std::make_unique(/* asmap */ std::vector(), /* deterministic */ false, /* consistency_check_ratio */ 0); - FillAddrMan(*addrmans[i]); - } - auto markSomeAsGood = [](AddrMan& addrman) { for (size_t source_i = 0; source_i < NUM_SOURCES; ++source_i) { for (size_t addr_i = 0; addr_i < NUM_ADDRESSES_PER_SOURCE; ++addr_i) { - if (addr_i % 32 == 0) { - addrman.Good(g_addresses[source_i][addr_i]); - } + addrman.Good(g_addresses[source_i][addr_i]); } } }; - uint64_t i = 0; + CreateAddresses(); + bench.run([&] { - markSomeAsGood(*addrmans.at(i)); - ++i; + // To make the benchmark independent of the number of evaluations, we always prepare a new addrman. + // This is necessary because AddrMan::Good() method modifies the object, affecting the timing of subsequent calls + // to the same method and we want to do the same amount of work in every loop iteration. + // + // This has some overhead (exactly the result of AddrManAdd benchmark), but that overhead is constant so improvements in + // AddrMan::Good() will still be noticeable. + AddrMan addrman(/* asmap */ std::vector(), /* deterministic */ false, /* consistency_check_ratio */ 0); + AddAddressesToAddrMan(addrman); + + markSomeAsGood(addrman); }); } BENCHMARK(AddrManAdd); BENCHMARK(AddrManSelect); BENCHMARK(AddrManGetAddr); -BENCHMARK(AddrManGood); +BENCHMARK(AddrManAddThenGood); diff --git a/src/bench/nanobench.h b/src/bench/nanobench.h index 030d6ebf6a..27df08fb69 100644 --- a/src/bench/nanobench.h +++ b/src/bench/nanobench.h @@ -33,7 +33,7 @@ // see https://semver.org/ #define ANKERL_NANOBENCH_VERSION_MAJOR 4 // incompatible API changes #define ANKERL_NANOBENCH_VERSION_MINOR 3 // backwards-compatible changes -#define ANKERL_NANOBENCH_VERSION_PATCH 4 // backwards-compatible bug fixes +#define ANKERL_NANOBENCH_VERSION_PATCH 6 // backwards-compatible bug fixes /////////////////////////////////////////////////////////////////////////////////////////////////// // public facing api - as minimal as possible @@ -88,13 +88,15 @@ } while (0) #endif -#if defined(__linux__) && defined(PERF_EVENT_IOC_ID) && defined(PERF_COUNT_HW_REF_CPU_CYCLES) && defined(PERF_FLAG_FD_CLOEXEC) && \ - !defined(ANKERL_NANOBENCH_DISABLE_PERF_COUNTERS) -// only enable perf counters on kernel 3.14 which seems to have all the necessary defines. The three PERF_... defines are not in -// kernel 2.6.32 (all others are). -# define ANKERL_NANOBENCH_PRIVATE_PERF_COUNTERS() 1 -#else -# define ANKERL_NANOBENCH_PRIVATE_PERF_COUNTERS() 0 +#define ANKERL_NANOBENCH_PRIVATE_PERF_COUNTERS() 0 +#if defined(__linux__) && !defined(ANKERL_NANOBENCH_DISABLE_PERF_COUNTERS) +# include +# if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 14, 0) +// PERF_COUNT_HW_REF_CPU_CYCLES only available since kernel 3.3 +// PERF_FLAG_FD_CLOEXEC since kernel 3.14 +# undef ANKERL_NANOBENCH_PRIVATE_PERF_COUNTERS +# define ANKERL_NANOBENCH_PRIVATE_PERF_COUNTERS() 1 +# endif #endif #if defined(__clang__) @@ -2210,20 +2212,20 @@ struct IterationLogic::Impl { columns.emplace_back(10, 1, "err%", "%", rErrorMedian * 100.0); double rInsMedian = -1.0; - if (mResult.has(Result::Measure::instructions)) { + if (mBench.performanceCounters() && mResult.has(Result::Measure::instructions)) { rInsMedian = mResult.median(Result::Measure::instructions); columns.emplace_back(18, 2, "ins/" + mBench.unit(), "", rInsMedian / mBench.batch()); } double rCycMedian = -1.0; - if (mResult.has(Result::Measure::cpucycles)) { + if (mBench.performanceCounters() && mResult.has(Result::Measure::cpucycles)) { rCycMedian = mResult.median(Result::Measure::cpucycles); columns.emplace_back(18, 2, "cyc/" + mBench.unit(), "", rCycMedian / mBench.batch()); } if (rInsMedian > 0.0 && rCycMedian > 0.0) { columns.emplace_back(9, 3, "IPC", "", rCycMedian <= 0.0 ? 0.0 : rInsMedian / rCycMedian); } - if (mResult.has(Result::Measure::branchinstructions)) { + if (mBench.performanceCounters() && mResult.has(Result::Measure::branchinstructions)) { double rBraMedian = mResult.median(Result::Measure::branchinstructions); columns.emplace_back(17, 2, "bra/" + mBench.unit(), "", rBraMedian / mBench.batch()); if (mResult.has(Result::Measure::branchmisses)) { @@ -2402,6 +2404,14 @@ public: return (a + divisor / 2) / divisor; } + ANKERL_NANOBENCH_NO_SANITIZE("integer", "undefined") + static inline uint32_t mix(uint32_t x) noexcept { + x ^= x << 13; + x ^= x >> 17; + x ^= x << 5; + return x; + } + template ANKERL_NANOBENCH_NO_SANITIZE("integer", "undefined") void calibrate(Op&& op) { @@ -2441,15 +2451,10 @@ public: uint64_t const numIters = 100000U + (std::random_device{}() & 3); uint64_t n = numIters; uint32_t x = 1234567; - auto fn = [&]() { - x ^= x << 13; - x ^= x >> 17; - x ^= x << 5; - }; beginMeasure(); while (n-- > 0) { - fn(); + x = mix(x); } endMeasure(); detail::doNotOptimizeAway(x); @@ -2459,8 +2464,8 @@ public: beginMeasure(); while (n-- > 0) { // we now run *twice* so we can easily calculate the overhead - fn(); - fn(); + x = mix(x); + x = mix(x); } endMeasure(); detail::doNotOptimizeAway(x);