From b3a1c8b9c67a87b865b270540ed111fc8c516f0a Mon Sep 17 00:00:00 2001 From: UdjinM6 Date: Fri, 22 Dec 2023 22:56:43 +0300 Subject: [PATCH] fix: ScanQuorums should not start cache population for outdated quorums (#5784) ## Issue being fixed or feature implemented Cache population for old quorums is a cpu heavy operation and should be avoided for inactive quorums _at least_ oin `ScanQuorums`. This issue is critical for testnet and other small network because every mn participate in almost every platform quorum and cache population for 2 months of quorums can easily block everything for 15+ minutes on a 4 cpu node. On mainnet quorum distribution is much better but it's still a small waste of cpu (or not so small for unlucky nodes). #5761 follow-up ## What was done? Do not start cache population for outdated quorums, improve logs in `StartCachePopulatorThread` to make it easier to see what's going on. ## How Has This Been Tested? run a mn on testnet ## Breaking Changes n/a ## Checklist: - [x] I have performed a self-review of my own code - [ ] I have commented my code, particularly in hard-to-understand areas - [ ] I have added or updated relevant unit/integration/functional/e2e tests - [ ] I have made corresponding changes to the documentation - [x] I have assigned this pull request to a milestone _(for repository code-owners and collaborators only)_ --- src/llmq/quorums.cpp | 23 ++++++++++++++++------- src/llmq/quorums.h | 4 ++-- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/src/llmq/quorums.cpp b/src/llmq/quorums.cpp index 0d3969cdd3..e7d8073b64 100644 --- a/src/llmq/quorums.cpp +++ b/src/llmq/quorums.cpp @@ -362,7 +362,7 @@ void CQuorumManager::CheckQuorumConnections(const Consensus::LLMQParams& llmqPar } } -CQuorumPtr CQuorumManager::BuildQuorumFromCommitment(const Consensus::LLMQType llmqType, gsl::not_null pQuorumBaseBlockIndex) const +CQuorumPtr CQuorumManager::BuildQuorumFromCommitment(const Consensus::LLMQType llmqType, gsl::not_null pQuorumBaseBlockIndex, bool populate_cache) const { const uint256& quorumHash{pQuorumBaseBlockIndex->GetBlockHash()}; uint256 minedBlockHash; @@ -392,7 +392,7 @@ CQuorumPtr CQuorumManager::BuildQuorumFromCommitment(const Consensus::LLMQType l } } - if (hasValidVvec) { + if (hasValidVvec && populate_cache) { // pre-populate caches in the background // recovering public key shares is quite expensive and would result in serious lags for the first few signing // sessions if the shares would be calculated on-demand @@ -572,7 +572,9 @@ std::vector CQuorumManager::ScanQuorums(Consensus::LLMQType llmqTyp for (auto& pQuorumBaseBlockIndex : pQuorumBaseBlockIndexes) { assert(pQuorumBaseBlockIndex); - auto quorum = GetQuorum(llmqType, pQuorumBaseBlockIndex); + // populate cache for keepOldConnections most recent quorums only + bool populate_cache = vecResultQuorums.size() < llmq_params_opt->keepOldConnections; + auto quorum = GetQuorum(llmqType, pQuorumBaseBlockIndex, populate_cache); assert(quorum != nullptr); vecResultQuorums.emplace_back(quorum); } @@ -602,7 +604,7 @@ CQuorumCPtr CQuorumManager::GetQuorum(Consensus::LLMQType llmqType, const uint25 return GetQuorum(llmqType, pQuorumBaseBlockIndex); } -CQuorumCPtr CQuorumManager::GetQuorum(Consensus::LLMQType llmqType, gsl::not_null pQuorumBaseBlockIndex) const +CQuorumCPtr CQuorumManager::GetQuorum(Consensus::LLMQType llmqType, gsl::not_null pQuorumBaseBlockIndex, bool populate_cache) const { auto quorumHash = pQuorumBaseBlockIndex->GetBlockHash(); @@ -617,7 +619,7 @@ CQuorumCPtr CQuorumManager::GetQuorum(Consensus::LLMQType llmqType, gsl::not_nul return pQuorum; } - return BuildQuorumFromCommitment(llmqType, pQuorumBaseBlockIndex); + return BuildQuorumFromCommitment(llmqType, pQuorumBaseBlockIndex, populate_cache); } size_t CQuorumManager::GetQuorumRecoveryStartOffset(const CQuorumCPtr pQuorum, const CBlockIndex* pIndex) const @@ -850,7 +852,10 @@ void CQuorumManager::StartCachePopulatorThread(const CQuorumCPtr pQuorum) const } cxxtimer::Timer t(true); - LogPrint(BCLog::LLMQ, "CQuorumManager::StartCachePopulatorThread -- start\n"); + LogPrint(BCLog::LLMQ, "CQuorumManager::StartCachePopulatorThread -- type=%d height=%d hash=%s start\n", + ToUnderlying(pQuorum->params.type), + pQuorum->m_quorum_base_block_index->nHeight, + pQuorum->m_quorum_base_block_index->GetBlockHash().ToString()); // when then later some other thread tries to get keys, it will be much faster workerPool.push([pQuorum, t, this](int threadId) { @@ -862,7 +867,11 @@ void CQuorumManager::StartCachePopulatorThread(const CQuorumCPtr pQuorum) const pQuorum->GetPubKeyShare(i); } } - LogPrint(BCLog::LLMQ, "CQuorumManager::StartCachePopulatorThread -- done. time=%d\n", t.count()); + LogPrint(BCLog::LLMQ, "CQuorumManager::StartCachePopulatorThread -- type=%d height=%d hash=%s done. time=%d\n", + ToUnderlying(pQuorum->params.type), + pQuorum->m_quorum_base_block_index->nHeight, + pQuorum->m_quorum_base_block_index->GetBlockHash().ToString(), + t.count()); }); } diff --git a/src/llmq/quorums.h b/src/llmq/quorums.h index afb515a056..e80e876aee 100644 --- a/src/llmq/quorums.h +++ b/src/llmq/quorums.h @@ -267,10 +267,10 @@ private: // all private methods here are cs_main-free void CheckQuorumConnections(const Consensus::LLMQParams& llmqParams, const CBlockIndex *pindexNew) const; - CQuorumPtr BuildQuorumFromCommitment(Consensus::LLMQType llmqType, gsl::not_null pQuorumBaseBlockIndex) const; + CQuorumPtr BuildQuorumFromCommitment(Consensus::LLMQType llmqType, gsl::not_null pQuorumBaseBlockIndex, bool populate_cache) const; bool BuildQuorumContributions(const CFinalCommitmentPtr& fqc, const std::shared_ptr& quorum) const; - CQuorumCPtr GetQuorum(Consensus::LLMQType llmqType, gsl::not_null pindex) const; + CQuorumCPtr GetQuorum(Consensus::LLMQType llmqType, gsl::not_null pindex, bool populate_cache = true) const; /// Returns the start offset for the masternode with the given proTxHash. This offset is applied when picking data recovery members of a quorum's /// memberlist and is calculated based on a list of all member of all active quorums for the given llmqType in a way that each member /// should receive the same number of request if all active llmqType members requests data from one llmqType quorum.