From f3e5ca703cc62bb0e1e4299dd67e7045e0e7aebd Mon Sep 17 00:00:00 2001 From: Kittywhiskers Van Gogh <63189531+kittywhiskers@users.noreply.github.com> Date: Tue, 14 Jan 2020 11:31:36 -0800 Subject: [PATCH 1/2] bitcoin#17925: Improve UpdateTransactionsFromBlock with Epochs --- src/txmempool.cpp | 48 ++++++++++++++++++++++++++++++------------- src/txmempool.h | 52 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+), 14 deletions(-) diff --git a/src/txmempool.cpp b/src/txmempool.cpp index 5decd7b2ad..4ff75e3b1d 100644 --- a/src/txmempool.cpp +++ b/src/txmempool.cpp @@ -29,7 +29,7 @@ CTxMemPoolEntry::CTxMemPoolEntry(const CTransactionRef& _tx, const CAmount& _nFe int64_t _nTime, unsigned int _entryHeight, bool _spendsCoinbase, unsigned int _sigOps, LockPoints lp): tx(_tx), nFee(_nFee), nTime(_nTime), entryHeight(_entryHeight), - spendsCoinbase(_spendsCoinbase), sigOpCount(_sigOps), lockPoints(lp) + spendsCoinbase(_spendsCoinbase), sigOpCount(_sigOps), lockPoints(lp), m_epoch(0) { nTxSize = ::GetSerializeSize(*_tx, SER_NETWORK, PROTOCOL_VERSION); nUsageSize = RecursiveDynamicUsage(tx); @@ -126,8 +126,6 @@ void CTxMemPool::UpdateTransactionsFromBlock(const std::vector &vHashes // setMemPoolChildren will be updated, an assumption made in // UpdateForDescendants. for (const uint256 &hash : reverse_iterate(vHashesToUpdate)) { - // we cache the in-mempool children to avoid duplicate updates - setEntries setChildren; // calculate children from mapNextTx txiter it = mapTx.find(hash); if (it == mapTx.end()) { @@ -136,17 +134,21 @@ void CTxMemPool::UpdateTransactionsFromBlock(const std::vector &vHashes auto iter = mapNextTx.lower_bound(COutPoint(hash, 0)); // First calculate the children, and update setMemPoolChildren to // include them, and update their setMemPoolParents to include this tx. - for (; iter != mapNextTx.end() && iter->first->hash == hash; ++iter) { - const uint256 &childHash = iter->second->GetHash(); - txiter childIter = mapTx.find(childHash); - assert(childIter != mapTx.end()); - // We can skip updating entries we've encountered before or that - // are in the block (which are already accounted for). - if (setChildren.insert(childIter).second && !setAlreadyIncluded.count(childHash)) { - UpdateChild(it, childIter, true); - UpdateParent(childIter, it, true); + // we cache the in-mempool children to avoid duplicate updates + { + const auto epoch = GetFreshEpoch(); + for (; iter != mapNextTx.end() && iter->first->hash == hash; ++iter) { + const uint256 &childHash = iter->second->GetHash(); + txiter childIter = mapTx.find(childHash); + assert(childIter != mapTx.end()); + // We can skip updating entries we've encountered before or that + // are in the block (which are already accounted for). + if (!visited(childIter) && !setAlreadyIncluded.count(childHash)) { + UpdateChild(it, childIter, true); + UpdateParent(childIter, it, true); + } } - } + } // release epoch guard for UpdateForDescendants UpdateForDescendants(it, mapMemPoolDescendantsToUpdate, setAlreadyIncluded); } } @@ -329,7 +331,7 @@ void CTxMemPoolEntry::UpdateAncestorState(int64_t modifySize, CAmount modifyFee, } CTxMemPool::CTxMemPool(CBlockPolicyEstimator* estimator) : - nTransactionsUpdated(0), minerPolicyEstimator(estimator) + nTransactionsUpdated(0), minerPolicyEstimator(estimator), m_epoch(0), m_has_epoch_guard(false) { _clear(); //lock free clear @@ -1591,4 +1593,22 @@ void CTxMemPool::GetTransactionAncestry(const uint256& txid, size_t& ancestors, } } +CTxMemPool::EpochGuard CTxMemPool::GetFreshEpoch() const +{ + return EpochGuard(*this); +} +CTxMemPool::EpochGuard::EpochGuard(const CTxMemPool& in) : pool(in) +{ + assert(!pool.m_has_epoch_guard); + ++pool.m_epoch; + pool.m_has_epoch_guard = true; +} + +CTxMemPool::EpochGuard::~EpochGuard() +{ + // prevents stale results being used + ++pool.m_epoch; + pool.m_has_epoch_guard = false; +} + SaltedTxidHasher::SaltedTxidHasher() : k0(GetRand(std::numeric_limits::max())), k1(GetRand(std::numeric_limits::max())) {} diff --git a/src/txmempool.h b/src/txmempool.h index 44ab0b4b71..c6c0883ffc 100644 --- a/src/txmempool.h +++ b/src/txmempool.h @@ -136,6 +136,7 @@ public: // If this is a proTx, this will be the hash of the key for which this ProTx was valid mutable uint256 validForProTxKey; mutable bool isKeyChangeProTx{false}; + mutable uint64_t m_epoch; //!< epoch when last touched, useful for graph algorithms }; // Helpers for modifying CTxMemPool::mapTx, which is a boost multi_index. @@ -456,6 +457,8 @@ private: mutable int64_t lastRollingFeeUpdate; mutable bool blockSinceLastRollingFeeBump; mutable double rollingMinimumFeeRate; //!< minimum fee to get into the pool, decreases exponentially + mutable uint64_t m_epoch; + mutable bool m_has_epoch_guard; void trackPackageRemoved(const CFeeRate& rate) EXCLUSIVE_LOCKS_REQUIRED(cs); @@ -731,6 +734,55 @@ private: * removal. */ void removeUnchecked(txiter entry, MemPoolRemovalReason reason = MemPoolRemovalReason::UNKNOWN) EXCLUSIVE_LOCKS_REQUIRED(cs); +public: + /** EpochGuard: RAII-style guard for using epoch-based graph traversal algorithms. + * When walking ancestors or descendants, we generally want to avoid + * visiting the same transactions twice. Some traversal algorithms use + * std::set (or setEntries) to deduplicate the transaction we visit. + * However, use of std::set is algorithmically undesirable because it both + * adds an asymptotic factor of O(log n) to traverals cost and triggers O(n) + * more dynamic memory allocations. + * In many algorithms we can replace std::set with an internal mempool + * counter to track the time (or, "epoch") that we began a traversal, and + * check + update a per-transaction epoch for each transaction we look at to + * determine if that transaction has not yet been visited during the current + * traversal's epoch. + * Algorithms using std::set can be replaced on a one by one basis. + * Both techniques are not fundamentally incomaptible across the codebase. + * Generally speaking, however, the remaining use of std::set for mempool + * traversal should be viewed as a TODO for replacement with an epoch based + * traversal, rather than a preference for std::set over epochs in that + * algorithm. + */ + class EpochGuard { + const CTxMemPool& pool; + public: + EpochGuard(const CTxMemPool& in); + ~EpochGuard(); + }; + // N.B. GetFreshEpoch modifies mutable state via the EpochGuard construction + // (and later destruction) + EpochGuard GetFreshEpoch() const EXCLUSIVE_LOCKS_REQUIRED(cs); + + /** visited marks a CTxMemPoolEntry as having been traversed + * during the lifetime of the most recently created EpochGuard + * and returns false if we are the first visitor, true otherwise. + * + * An EpochGuard must be held when visited is called or an assert will be + * triggered. + * + */ + bool visited(txiter it) const EXCLUSIVE_LOCKS_REQUIRED(cs) { + assert(m_has_epoch_guard); + bool ret = it->m_epoch >= m_epoch; + it->m_epoch = std::max(it->m_epoch, m_epoch); + return ret; + } + + bool visited(boost::optional it) const EXCLUSIVE_LOCKS_REQUIRED(cs) { + assert(m_has_epoch_guard); + return !it || visited(*it); + } }; /** From 0206ca2dcb3f1292f35ba8abfa1d534c1afcc321 Mon Sep 17 00:00:00 2001 From: Kittywhiskers Van Gogh <63189531+kittywhiskers@users.noreply.github.com> Date: Thu, 19 Sep 2019 13:59:49 -0400 Subject: [PATCH 2/2] bitcoin#16805: add timing information to FlushStateToDisk() --- src/Makefile.am | 1 + src/Makefile.test.include | 1 + src/logging/timer.h | 104 +++++++++++++++++++++++++++++++++++++ src/test/logging_tests.cpp | 36 +++++++++++++ src/validation.cpp | 28 +++++++++- 5 files changed, 168 insertions(+), 2 deletions(-) create mode 100644 src/logging/timer.h create mode 100644 src/test/logging_tests.cpp diff --git a/src/Makefile.am b/src/Makefile.am index f2f6ddc879..7c9aba61eb 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -199,6 +199,7 @@ BITCOIN_CORE_H = \ llmq/quorums_signing_shares.h \ llmq/quorums_utils.h \ logging.h \ + logging/timer.h \ masternode/activemasternode.h \ masternode/masternode-meta.h \ masternode/masternode-payments.h \ diff --git a/src/Makefile.test.include b/src/Makefile.test.include index 4d3c6f4d6f..3b1759af0b 100644 --- a/src/Makefile.test.include +++ b/src/Makefile.test.include @@ -69,6 +69,7 @@ BITCOIN_TESTS =\ test/key_tests.cpp \ test/lcg.h \ test/limitedmap_tests.cpp \ + test/logging_tests.cpp \ test/dbwrapper_tests.cpp \ test/main_tests.cpp \ test/mempool_tests.cpp \ diff --git a/src/logging/timer.h b/src/logging/timer.h new file mode 100644 index 0000000000..ca34aa08ac --- /dev/null +++ b/src/logging/timer.h @@ -0,0 +1,104 @@ +// Copyright (c) 2009-2010 Satoshi Nakamoto +// Copyright (c) 2009-2018 The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#ifndef BITCOIN_LOGGING_TIMER_H +#define BITCOIN_LOGGING_TIMER_H + +#include +#include +#include + +#include +#include + + +namespace BCLog { + +//! RAII-style object that outputs timing information to logs. +template +class Timer +{ +public: + //! If log_category is left as the default, end_msg will log unconditionally + //! (instead of being filtered by category). + Timer( + std::string prefix, + std::string end_msg, + BCLog::LogFlags log_category = BCLog::LogFlags::ALL) : + m_prefix(std::move(prefix)), + m_title(std::move(end_msg)), + m_log_category(log_category) + { + this->Log(strprintf("%s started", m_title)); + m_start_t = GetTime(); + } + + ~Timer() + { + this->Log(strprintf("%s completed", m_title)); + } + + void Log(const std::string& msg) + { + const std::string full_msg = this->LogMsg(msg); + + if (m_log_category == BCLog::LogFlags::ALL) { + LogPrintf("%s\n", full_msg); + } else { + LogPrint(m_log_category, "%s\n", full_msg); + } + } + + std::string LogMsg(const std::string& msg) + { + const auto end_time = GetTime() - m_start_t; + if (m_start_t.count() <= 0) { + return strprintf("%s: %s", m_prefix, msg); + } + + std::string units = ""; + float divisor = 1; + + if (std::is_same::value) { + units = "μs"; + } else if (std::is_same::value) { + units = "ms"; + divisor = 1000.; + } else if (std::is_same::value) { + units = "s"; + divisor = 1000. * 1000.; + } + + const float time_ms = end_time.count() / divisor; + return strprintf("%s: %s (%.2f%s)", m_prefix, msg, time_ms, units); + } + +private: + std::chrono::microseconds m_start_t{}; + + //! Log prefix; usually the name of the function this was created in. + const std::string m_prefix{}; + + //! A descriptive message of what is being timed. + const std::string m_title{}; + + //! Forwarded on to LogPrint if specified - has the effect of only + //! outputing the timing log when a particular debug= category is specified. + const BCLog::LogFlags m_log_category{}; + +}; + +} // namespace BCLog + + +#define LOG_TIME_MICROS(end_msg, ...) \ + BCLog::Timer PASTE2(logging_timer, __COUNTER__)(__func__, end_msg, ## __VA_ARGS__) +#define LOG_TIME_MILLIS(end_msg, ...) \ + BCLog::Timer PASTE2(logging_timer, __COUNTER__)(__func__, end_msg, ## __VA_ARGS__) +#define LOG_TIME_SECONDS(end_msg, ...) \ + BCLog::Timer PASTE2(logging_timer, __COUNTER__)(__func__, end_msg, ## __VA_ARGS__) + + +#endif // BITCOIN_LOGGING_TIMER_H diff --git a/src/test/logging_tests.cpp b/src/test/logging_tests.cpp new file mode 100644 index 0000000000..8ed9d3aa98 --- /dev/null +++ b/src/test/logging_tests.cpp @@ -0,0 +1,36 @@ +// Copyright (c) 2019 The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#include +#include +#include + +#include + +#include + +BOOST_FIXTURE_TEST_SUITE(logging_tests, BasicTestingSetup) + +BOOST_AUTO_TEST_CASE(logging_timer) +{ + + SetMockTime(1); + auto sec_timer = BCLog::Timer("tests", "end_msg"); + SetMockTime(2); + BOOST_CHECK_EQUAL(sec_timer.LogMsg("test secs"), "tests: test secs (1.00s)"); + + SetMockTime(1); + auto ms_timer = BCLog::Timer("tests", "end_msg"); + SetMockTime(2); + BOOST_CHECK_EQUAL(ms_timer.LogMsg("test ms"), "tests: test ms (1000.00ms)"); + + SetMockTime(1); + auto micro_timer = BCLog::Timer("tests", "end_msg"); + SetMockTime(2); + BOOST_CHECK_EQUAL(micro_timer.LogMsg("test micros"), "tests: test micros (1000000.00μs)"); + + SetMockTime(0); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/validation.cpp b/src/validation.cpp index df3ffbf83e..e387f05972 100644 --- a/src/validation.cpp +++ b/src/validation.cpp @@ -19,6 +19,8 @@ #include #include #include +#include +#include #include #include #include @@ -2464,6 +2466,10 @@ bool static FlushStateToDisk(const CChainParams& chainparams, CValidationState & static int64_t nLastFlush = 0; std::set setFilesToPrune; bool full_flush_completed = false; + + const size_t coins_count = pcoinsTip->GetCacheSize(); + const size_t coins_mem_usage = pcoinsTip->DynamicMemoryUsage(); + try { { bool fFlushForPrune = false; @@ -2471,8 +2477,12 @@ bool static FlushStateToDisk(const CChainParams& chainparams, CValidationState & LOCK(cs_LastBlockFile); if (fPruneMode && (fCheckForPruning || nManualPruneHeight > 0) && !fReindex) { if (nManualPruneHeight > 0) { + LOG_TIME_MILLIS("find files to prune (manual)", BCLog::BENCHMARK); + FindFilesToPruneManual(setFilesToPrune, nManualPruneHeight); } else { + LOG_TIME_MILLIS("find files to prune", BCLog::BENCHMARK); + FindFilesToPrune(setFilesToPrune, chainparams.PruneAfterHeight()); fCheckForPruning = false; } @@ -2512,9 +2522,17 @@ bool static FlushStateToDisk(const CChainParams& chainparams, CValidationState & if (!CheckDiskSpace(0, true)) return state.Error("out of disk space"); // First make sure all block and undo data is flushed to disk. - FlushBlockFile(); + { + LOG_TIME_MILLIS("write block and undo data to disk", BCLog::BENCHMARK); + + // First make sure all block and undo data is flushed to disk. + FlushBlockFile(); + } + // Then update all block file information (which may refer to block and undo files). { + LOG_TIME_MILLIS("write block index to disk", BCLog::BENCHMARK); + std::vector > vFiles; vFiles.reserve(setDirtyFileInfo.size()); for (std::set::iterator it = setDirtyFileInfo.begin(); it != setDirtyFileInfo.end(); ) { @@ -2532,12 +2550,18 @@ bool static FlushStateToDisk(const CChainParams& chainparams, CValidationState & } } // Finally remove any pruned files - if (fFlushForPrune) + if (fFlushForPrune) { + LOG_TIME_MILLIS("unlink pruned files", BCLog::BENCHMARK); + UnlinkPrunedFiles(setFilesToPrune); + } nLastWrite = nNow; } // Flush best chain related state. This can only be done if the blocks / block index write was also done. if (fDoFullFlush && !pcoinsTip->GetBestBlock().IsNull()) { + LOG_TIME_SECONDS(strprintf("write coins cache to disk (%d coins, %.2fkB)", + coins_count, coins_mem_usage / 1000)); + // Typical Coin structures on disk are around 48 bytes in size. // Pushing a new one to the database can cause it to be written // twice (once in the log, and once in the tables). This is already