Merge bitcoin/bitcoin#28877: bench: Update nanobench to 4.3.11

fe434a469534766f18d7560d968deed37193835f bench: Update nanobench to 4.3.11 (TheCharlatan)

Pull request description:

  The newest version fixes the false positive `* Turbo is enabled, CPU frequency will fluctuate` warning on AMD CPUs. The file was directly taken from the release page: https://github.com/martinus/nanobench/releases/tag/v4.3.11.

  Other changes from the release notes:

  * Check for failures in parseFile(), perf events tweaks by tommi-cujo in https://github.com/martinus/nanobench/pull/84
  * Workaround missing noexcept for std::string move assignment by tommi-cujo in https://github.com/martinus/nanobench/pull/87
  * removed the link by martinus in https://github.com/martinus/nanobench/pull/89
  * Lots of minor cleanups by martinus in https://github.com/martinus/nanobench/pull/85
  * Add linter for version & clang-format. Updated version by martinus in https://github.com/martinus/nanobench/pull/90

ACKs for top commit:
  fanquake:
    ACK fe434a469534766f18d7560d968deed37193835f - have not tested.

Tree-SHA512: a8f15e1db1d993673e4b295a3bab22e67ee3c9f3c0bcbef28974fe9ff37dbb741967a526088d5b148c8d25c9d57cd3b844238100c17b23038638787461805678
This commit is contained in:
fanquake 2023-11-16 09:45:25 +00:00 committed by pasta
parent 417c86b949
commit 168e5e4a50
No known key found for this signature in database
GPG Key ID: E2F3D7916E722D38

View File

@ -33,7 +33,7 @@
// see https://semver.org/ // see https://semver.org/
#define ANKERL_NANOBENCH_VERSION_MAJOR 4 // incompatible API changes #define ANKERL_NANOBENCH_VERSION_MAJOR 4 // incompatible API changes
#define ANKERL_NANOBENCH_VERSION_MINOR 3 // backwards-compatible changes #define ANKERL_NANOBENCH_VERSION_MINOR 3 // backwards-compatible changes
#define ANKERL_NANOBENCH_VERSION_PATCH 10 // backwards-compatible bug fixes #define ANKERL_NANOBENCH_VERSION_PATCH 11 // backwards-compatible bug fixes
/////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////
// public facing api - as minimal as possible // public facing api - as minimal as possible
@ -120,6 +120,10 @@
# define ANKERL_NANOBENCH_IS_TRIVIALLY_COPYABLE(...) std::is_trivially_copyable<__VA_ARGS__>::value # define ANKERL_NANOBENCH_IS_TRIVIALLY_COPYABLE(...) std::is_trivially_copyable<__VA_ARGS__>::value
#endif #endif
// noexcept may be missing for std::string.
// See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58265
#define ANKERL_NANOBENCH_PRIVATE_NOEXCEPT_STRING_MOVE() std::is_nothrow_move_assignable<std::string>::value
// declarations /////////////////////////////////////////////////////////////////////////////////// // declarations ///////////////////////////////////////////////////////////////////////////////////
namespace ankerl { namespace ankerl {
@ -404,7 +408,7 @@ struct Config {
Config(); Config();
~Config(); ~Config();
Config& operator=(Config const& other); Config& operator=(Config const& other);
Config& operator=(Config&& other) noexcept; Config& operator=(Config&& other) noexcept(ANKERL_NANOBENCH(NOEXCEPT_STRING_MOVE));
Config(Config const& other); Config(Config const& other);
Config(Config&& other) noexcept; Config(Config&& other) noexcept;
}; };
@ -430,7 +434,7 @@ public:
~Result(); ~Result();
Result& operator=(Result const& other); Result& operator=(Result const& other);
Result& operator=(Result&& other) noexcept; Result& operator=(Result&& other) noexcept(ANKERL_NANOBENCH(NOEXCEPT_STRING_MOVE));
Result(Result const& other); Result(Result const& other);
Result(Result&& other) noexcept; Result(Result&& other) noexcept;
@ -596,7 +600,7 @@ public:
* *
* @return Vector containing the full state: * @return Vector containing the full state:
*/ */
std::vector<uint64_t> state() const; ANKERL_NANOBENCH(NODISCARD) std::vector<uint64_t> state() const;
private: private:
static constexpr uint64_t rotl(uint64_t x, unsigned k) noexcept; static constexpr uint64_t rotl(uint64_t x, unsigned k) noexcept;
@ -628,7 +632,7 @@ public:
Bench(); Bench();
Bench(Bench&& other) noexcept; Bench(Bench&& other) noexcept;
Bench& operator=(Bench&& other) noexcept; Bench& operator=(Bench&& other) noexcept(ANKERL_NANOBENCH(NOEXCEPT_STRING_MOVE));
Bench(Bench const& other); Bench(Bench const& other);
Bench& operator=(Bench const& other); Bench& operator=(Bench const& other);
~Bench() noexcept; ~Bench() noexcept;
@ -818,7 +822,7 @@ public:
* Default is zero, so we are fully relying on clockResolutionMultiple(). In most cases this is exactly what you want. If you see * Default is zero, so we are fully relying on clockResolutionMultiple(). In most cases this is exactly what you want. If you see
* that the evaluation is unreliable with a high `err%`, you can increase either minEpochTime() or minEpochIterations(). * that the evaluation is unreliable with a high `err%`, you can increase either minEpochTime() or minEpochIterations().
* *
* @see maxEpochTim), minEpochIterations * @see maxEpochTime, minEpochIterations
* *
* @param t Minimum time each epoch should take. * @param t Minimum time each epoch should take.
*/ */
@ -1030,7 +1034,7 @@ void doNotOptimizeAway(T const& val);
// These assembly magic is directly from what Google Benchmark is doing. I have previously used what facebook's folly was doing, but // These assembly magic is directly from what Google Benchmark is doing. I have previously used what facebook's folly was doing, but
// this seemed to have compilation problems in some cases. Google Benchmark seemed to be the most well tested anyways. // this seemed to have compilation problems in some cases. Google Benchmark seemed to be the most well tested anyways.
// see https://github.com/google/benchmark/blob/master/include/benchmark/benchmark.h#L307 // see https://github.com/google/benchmark/blob/v1.7.1/include/benchmark/benchmark.h#L443-L446
template <typename T> template <typename T>
void doNotOptimizeAway(T const& val) { void doNotOptimizeAway(T const& val) {
// NOLINTNEXTLINE(hicpp-no-assembler) // NOLINTNEXTLINE(hicpp-no-assembler)
@ -1781,7 +1785,7 @@ bool isEndlessRunning(std::string const& name);
bool isWarningsEnabled(); bool isWarningsEnabled();
template <typename T> template <typename T>
T parseFile(std::string const& filename); T parseFile(std::string const& filename, bool* fail);
void gatherStabilityInformation(std::vector<std::string>& warnings, std::vector<std::string>& recommendations); void gatherStabilityInformation(std::vector<std::string>& warnings, std::vector<std::string>& recommendations);
void printStabilityInformationOnce(std::ostream* outStream); void printStabilityInformationOnce(std::ostream* outStream);
@ -1839,7 +1843,7 @@ class Number {
public: public:
Number(int width, int precision, double value); Number(int width, int precision, double value);
Number(int width, int precision, int64_t value); Number(int width, int precision, int64_t value);
std::string to_s() const; ANKERL_NANOBENCH(NODISCARD) std::string to_s() const;
private: private:
friend std::ostream& operator<<(std::ostream& os, Number const& n); friend std::ostream& operator<<(std::ostream& os, Number const& n);
@ -1857,11 +1861,11 @@ std::ostream& operator<<(std::ostream& os, Number const& n);
class MarkDownColumn { class MarkDownColumn {
public: public:
MarkDownColumn(int w, int prec, std::string tit, std::string suff, double val); MarkDownColumn(int w, int prec, std::string tit, std::string suff, double val) noexcept;
std::string title() const; ANKERL_NANOBENCH(NODISCARD) std::string title() const;
std::string separator() const; ANKERL_NANOBENCH(NODISCARD) std::string separator() const;
std::string invalid() const; ANKERL_NANOBENCH(NODISCARD) std::string invalid() const;
std::string value() const; ANKERL_NANOBENCH(NODISCARD) std::string value() const;
private: private:
int mWidth; int mWidth;
@ -1976,9 +1980,9 @@ PerformanceCounters& performanceCounters() {
} }
// Windows version of doNotOptimizeAway // Windows version of doNotOptimizeAway
// see https://github.com/google/benchmark/blob/master/include/benchmark/benchmark.h#L307 // see https://github.com/google/benchmark/blob/v1.7.1/include/benchmark/benchmark.h#L514
// see https://github.com/facebook/folly/blob/master/folly/Benchmark.h#L280 // see https://github.com/facebook/folly/blob/v2023.01.30.00/folly/lang/Hint-inl.h#L54-L58
// see https://docs.microsoft.com/en-us/cpp/preprocessor/optimize // see https://learn.microsoft.com/en-us/cpp/preprocessor/optimize
# if defined(_MSC_VER) # if defined(_MSC_VER)
# pragma optimize("", off) # pragma optimize("", off)
void doNotOptimizeAwaySink(void const*) {} void doNotOptimizeAwaySink(void const*) {}
@ -1986,10 +1990,13 @@ void doNotOptimizeAwaySink(void const*) {}
# endif # endif
template <typename T> template <typename T>
T parseFile(std::string const& filename) { T parseFile(std::string const& filename, bool* fail) {
std::ifstream fin(filename); // NOLINT(misc-const-correctness) std::ifstream fin(filename); // NOLINT(misc-const-correctness)
T num{}; T num{};
fin >> num; fin >> num;
if (fail != nullptr) {
*fail = fin.fail();
}
return num; return num;
} }
@ -2032,16 +2039,15 @@ void gatherStabilityInformation(std::vector<std::string>& warnings, std::vector<
if (nprocs <= 0) { if (nprocs <= 0) {
warnings.emplace_back("couldn't figure out number of processors - no governor, turbo check possible"); warnings.emplace_back("couldn't figure out number of processors - no governor, turbo check possible");
} else { } else {
// check frequency scaling // check frequency scaling
for (long id = 0; id < nprocs; ++id) { for (long id = 0; id < nprocs; ++id) {
auto idStr = detail::fmt::to_s(static_cast<uint64_t>(id)); auto idStr = detail::fmt::to_s(static_cast<uint64_t>(id));
auto sysCpu = "/sys/devices/system/cpu/cpu" + idStr; auto sysCpu = "/sys/devices/system/cpu/cpu" + idStr;
auto minFreq = parseFile<int64_t>(sysCpu + "/cpufreq/scaling_min_freq"); auto minFreq = parseFile<int64_t>(sysCpu + "/cpufreq/scaling_min_freq", nullptr);
auto maxFreq = parseFile<int64_t>(sysCpu + "/cpufreq/scaling_max_freq"); auto maxFreq = parseFile<int64_t>(sysCpu + "/cpufreq/scaling_max_freq", nullptr);
if (minFreq != maxFreq) { if (minFreq != maxFreq) {
auto minMHz = static_cast<double>(minFreq) / 1000.0; auto minMHz = d(minFreq) / 1000.0;
auto maxMHz = static_cast<double>(maxFreq) / 1000.0; auto maxMHz = d(maxFreq) / 1000.0;
warnings.emplace_back("CPU frequency scaling enabled: CPU " + idStr + " between " + warnings.emplace_back("CPU frequency scaling enabled: CPU " + idStr + " between " +
detail::fmt::Number(1, 1, minMHz).to_s() + " and " + detail::fmt::Number(1, 1, maxMHz).to_s() + detail::fmt::Number(1, 1, minMHz).to_s() + " and " + detail::fmt::Number(1, 1, maxMHz).to_s() +
" MHz"); " MHz");
@ -2050,13 +2056,15 @@ void gatherStabilityInformation(std::vector<std::string>& warnings, std::vector<
} }
} }
auto currentGovernor = parseFile<std::string>("/sys/devices/system/cpu/cpu0/cpufreq/scaling_governor"); auto fail = false;
if ("performance" != currentGovernor) { auto currentGovernor = parseFile<std::string>("/sys/devices/system/cpu/cpu0/cpufreq/scaling_governor", &fail);
if (!fail && "performance" != currentGovernor) {
warnings.emplace_back("CPU governor is '" + currentGovernor + "' but should be 'performance'"); warnings.emplace_back("CPU governor is '" + currentGovernor + "' but should be 'performance'");
recommendPyPerf = true; recommendPyPerf = true;
} }
if (0 == parseFile<int>("/sys/devices/system/cpu/intel_pstate/no_turbo")) { auto noTurbo = parseFile<int>("/sys/devices/system/cpu/intel_pstate/no_turbo", &fail);
if (!fail && noTurbo == 0) {
warnings.emplace_back("Turbo is enabled, CPU frequency will fluctuate"); warnings.emplace_back("Turbo is enabled, CPU frequency will fluctuate");
recommendPyPerf = true; recommendPyPerf = true;
} }
@ -2250,10 +2258,9 @@ struct IterationLogic::Impl {
mNumIters = 0; mNumIters = 0;
} }
ANKERL_NANOBENCH_LOG(mBench.name() << ": " << detail::fmt::Number(20, 3, static_cast<double>(elapsed.count())) << " elapsed, " ANKERL_NANOBENCH_LOG(mBench.name() << ": " << detail::fmt::Number(20, 3, d(elapsed.count())) << " elapsed, "
<< detail::fmt::Number(20, 3, static_cast<double>(mTargetRuntimePerEpoch.count())) << detail::fmt::Number(20, 3, d(mTargetRuntimePerEpoch.count())) << " target. oldIters="
<< " target. oldIters=" << oldIters << ", mNumIters=" << mNumIters << oldIters << ", mNumIters=" << mNumIters << ", mState=" << static_cast<int>(mState));
<< ", mState=" << static_cast<int>(mState));
} }
// NOLINTNEXTLINE(readability-function-cognitive-complexity) // NOLINTNEXTLINE(readability-function-cognitive-complexity)
@ -2357,7 +2364,7 @@ struct IterationLogic::Impl {
} }
os << fmt::MarkDownCode(mBench.name()); os << fmt::MarkDownCode(mBench.name());
if (showUnstable) { if (showUnstable) {
auto avgIters = static_cast<double>(mTotalNumIters) / static_cast<double>(mBench.epochs()); auto avgIters = d(mTotalNumIters) / d(mBench.epochs());
// NOLINTNEXTLINE(bugprone-incorrect-roundings) // NOLINTNEXTLINE(bugprone-incorrect-roundings)
auto suggestedIters = static_cast<uint64_t>(avgIters * 10 + 0.5); auto suggestedIters = static_cast<uint64_t>(avgIters * 10 + 0.5);
@ -2435,7 +2442,7 @@ public:
bool monitor(perf_sw_ids swId, Target target); bool monitor(perf_sw_ids swId, Target target);
bool monitor(perf_hw_id hwId, Target target); bool monitor(perf_hw_id hwId, Target target);
bool hasError() const noexcept { ANKERL_NANOBENCH(NODISCARD) bool hasError() const noexcept {
return mHasError; return mHasError;
} }
@ -2691,16 +2698,23 @@ PerformanceCounters::PerformanceCounters()
, mVal() , mVal()
, mHas() { , mHas() {
mHas.pageFaults = mPc->monitor(PERF_COUNT_SW_PAGE_FAULTS, LinuxPerformanceCounters::Target(&mVal.pageFaults, true, false)); // HW events
mHas.cpuCycles = mPc->monitor(PERF_COUNT_HW_REF_CPU_CYCLES, LinuxPerformanceCounters::Target(&mVal.cpuCycles, true, false)); mHas.cpuCycles = mPc->monitor(PERF_COUNT_HW_REF_CPU_CYCLES, LinuxPerformanceCounters::Target(&mVal.cpuCycles, true, false));
mHas.contextSwitches = if (!mHas.cpuCycles) {
mPc->monitor(PERF_COUNT_SW_CONTEXT_SWITCHES, LinuxPerformanceCounters::Target(&mVal.contextSwitches, true, false)); // Fallback to cycles counter, reference cycles not available in many systems.
mHas.cpuCycles = mPc->monitor(PERF_COUNT_HW_CPU_CYCLES, LinuxPerformanceCounters::Target(&mVal.cpuCycles, true, false));
}
mHas.instructions = mPc->monitor(PERF_COUNT_HW_INSTRUCTIONS, LinuxPerformanceCounters::Target(&mVal.instructions, true, true)); mHas.instructions = mPc->monitor(PERF_COUNT_HW_INSTRUCTIONS, LinuxPerformanceCounters::Target(&mVal.instructions, true, true));
mHas.branchInstructions = mHas.branchInstructions =
mPc->monitor(PERF_COUNT_HW_BRANCH_INSTRUCTIONS, LinuxPerformanceCounters::Target(&mVal.branchInstructions, true, false)); mPc->monitor(PERF_COUNT_HW_BRANCH_INSTRUCTIONS, LinuxPerformanceCounters::Target(&mVal.branchInstructions, true, false));
mHas.branchMisses = mPc->monitor(PERF_COUNT_HW_BRANCH_MISSES, LinuxPerformanceCounters::Target(&mVal.branchMisses, true, false)); mHas.branchMisses = mPc->monitor(PERF_COUNT_HW_BRANCH_MISSES, LinuxPerformanceCounters::Target(&mVal.branchMisses, true, false));
// mHas.branchMisses = false; // mHas.branchMisses = false;
// SW events
mHas.pageFaults = mPc->monitor(PERF_COUNT_SW_PAGE_FAULTS, LinuxPerformanceCounters::Target(&mVal.pageFaults, true, false));
mHas.contextSwitches =
mPc->monitor(PERF_COUNT_SW_CONTEXT_SWITCHES, LinuxPerformanceCounters::Target(&mVal.contextSwitches, true, false));
mPc->start(); mPc->start();
mPc->calibrate([] { mPc->calibrate([] {
auto before = ankerl::nanobench::Clock::now(); auto before = ankerl::nanobench::Clock::now();
@ -2789,7 +2803,7 @@ void StreamStateRestorer::restore() {
Number::Number(int width, int precision, int64_t value) Number::Number(int width, int precision, int64_t value)
: mWidth(width) : mWidth(width)
, mPrecision(precision) , mPrecision(precision)
, mValue(static_cast<double>(value)) {} , mValue(d(value)) {}
Number::Number(int width, int precision, double value) Number::Number(int width, int precision, double value)
: mWidth(width) : mWidth(width)
@ -2823,7 +2837,7 @@ std::ostream& operator<<(std::ostream& os, Number const& n) {
return n.write(os); return n.write(os);
} }
MarkDownColumn::MarkDownColumn(int w, int prec, std::string tit, std::string suff, double val) MarkDownColumn::MarkDownColumn(int w, int prec, std::string tit, std::string suff, double val) noexcept
: mWidth(w) : mWidth(w)
, mPrecision(prec) , mPrecision(prec)
, mTitle(std::move(tit)) , mTitle(std::move(tit))
@ -2884,14 +2898,14 @@ std::ostream& operator<<(std::ostream& os, MarkDownCode const& mdCode) {
Config::Config() = default; Config::Config() = default;
Config::~Config() = default; Config::~Config() = default;
Config& Config::operator=(Config const&) = default; Config& Config::operator=(Config const&) = default;
Config& Config::operator=(Config&&) noexcept = default; Config& Config::operator=(Config&&) noexcept(ANKERL_NANOBENCH(NOEXCEPT_STRING_MOVE)) = default;
Config::Config(Config const&) = default; Config::Config(Config const&) = default;
Config::Config(Config&&) noexcept = default; Config::Config(Config&&) noexcept = default;
// provide implementation here so it's only generated once // provide implementation here so it's only generated once
Result::~Result() = default; Result::~Result() = default;
Result& Result::operator=(Result const&) = default; Result& Result::operator=(Result const&) = default;
Result& Result::operator=(Result&&) noexcept = default; Result& Result::operator=(Result&&) noexcept(ANKERL_NANOBENCH(NOEXCEPT_STRING_MOVE)) = default;
Result::Result(Result const&) = default; Result::Result(Result const&) = default;
Result::Result(Result&&) noexcept = default; Result::Result(Result&&) noexcept = default;
@ -2992,7 +3006,7 @@ double Result::medianAbsolutePercentError(Measure m) const {
auto data = mNameToMeasurements[detail::u(m)]; auto data = mNameToMeasurements[detail::u(m)];
// calculates MdAPE which is the median of percentage error // calculates MdAPE which is the median of percentage error
// see https://www.spiderfinancial.com/support/documentation/numxl/reference-manual/forecasting-performance/mdape // see https://support.numxl.com/hc/en-us/articles/115001223503-MdAPE-Median-Absolute-Percentage-Error
auto med = calcMedian(data); auto med = calcMedian(data);
// transform the data to absolute error // transform the data to absolute error
@ -3106,7 +3120,7 @@ Bench::Bench() {
} }
Bench::Bench(Bench&&) noexcept = default; Bench::Bench(Bench&&) noexcept = default;
Bench& Bench::operator=(Bench&&) noexcept = default; Bench& Bench::operator=(Bench&&) noexcept(ANKERL_NANOBENCH(NOEXCEPT_STRING_MOVE)) = default;
Bench::Bench(Bench const&) = default; Bench::Bench(Bench const&) = default;
Bench& Bench::operator=(Bench const&) = default; Bench& Bench::operator=(Bench const&) = default;
Bench::~Bench() noexcept = default; Bench::~Bench() noexcept = default;
@ -3423,7 +3437,7 @@ BigO::BigO(std::string bigOName, RangeMeasure const& rangeMeasure)
sumMeasure += rm.second; sumMeasure += rm.second;
} }
auto n = static_cast<double>(rangeMeasure.size()); auto n = detail::d(rangeMeasure.size());
auto mean = sumMeasure / n; auto mean = sumMeasure / n;
mNormalizedRootMeanSquare = std::sqrt(err / n) / mean; mNormalizedRootMeanSquare = std::sqrt(err / n) / mean;
} }