fix: ScanQuorums should not start cache population for outdated quorums (#5784)

## Issue being fixed or feature implemented Cache population for old quorums is a cpu heavy operation and should be avoided for inactive quorums _at least_ oin `ScanQuorums`. This issue is critical for testnet and other small network because every mn participate in almost every platform quorum and cache population for 2 months of quorums can easily block everything for 15+ minutes on a 4 cpu node. On mainnet quorum distribution is much better but it's still a small waste of cpu (or not so small for unlucky nodes). #5761 follow-up ## What was done? Do not start cache population for outdated quorums, improve logs in `StartCachePopulatorThread` to make it easier to see what's going on. ## How Has This Been Tested? run a mn on testnet ## Breaking Changes n/a ## Checklist: - [x] I have performed a self-review of my own code - [ ] I have commented my code, particularly in hard-to-understand areas - [ ] I have added or updated relevant unit/integration/functional/e2e tests - [ ] I have made corresponding changes to the documentation - [x] I have assigned this pull request to a milestone _(for repository code-owners and collaborators only)_
2024-12-24 19:42:46 +01:00 · 2023-12-22 22:56:43 +03:00 · 2023-12-22 22:56:43 +03:00 · b3a1c8b9c6
commit b3a1c8b9c6
parent 71e7658fdf
2 changed files with 18 additions and 9 deletions
--- a/src/llmq/quorums.cpp
+++ b/src/llmq/quorums.cpp
@ -362,7 +362,7 @@ void CQuorumManager::CheckQuorumConnections(const Consensus::LLMQParams& llmqPar
    }
 }

-CQuorumPtr CQuorumManager::BuildQuorumFromCommitment(const Consensus::LLMQType llmqType, gsl::not_null<const CBlockIndex*> pQuorumBaseBlockIndex) const
+CQuorumPtr CQuorumManager::BuildQuorumFromCommitment(const Consensus::LLMQType llmqType, gsl::not_null<const CBlockIndex*> pQuorumBaseBlockIndex, bool populate_cache) const
 {
    const uint256& quorumHash{pQuorumBaseBlockIndex->GetBlockHash()};
    uint256 minedBlockHash;
@ -392,7 +392,7 @@ CQuorumPtr CQuorumManager::BuildQuorumFromCommitment(const Consensus::LLMQType l
        }
    }

-    if (hasValidVvec) {
+    if (hasValidVvec && populate_cache) {
        // pre-populate caches in the background
        // recovering public key shares is quite expensive and would result in serious lags for the first few signing
        // sessions if the shares would be calculated on-demand
@ -572,7 +572,9 @@ std::vector<CQuorumCPtr> CQuorumManager::ScanQuorums(Consensus::LLMQType llmqTyp

    for (auto& pQuorumBaseBlockIndex : pQuorumBaseBlockIndexes) {
        assert(pQuorumBaseBlockIndex);
-        auto quorum = GetQuorum(llmqType, pQuorumBaseBlockIndex);
+        // populate cache for keepOldConnections most recent quorums only
+        bool populate_cache = vecResultQuorums.size() < llmq_params_opt->keepOldConnections;
+        auto quorum = GetQuorum(llmqType, pQuorumBaseBlockIndex, populate_cache);
        assert(quorum != nullptr);
        vecResultQuorums.emplace_back(quorum);
    }
@ -602,7 +604,7 @@ CQuorumCPtr CQuorumManager::GetQuorum(Consensus::LLMQType llmqType, const uint25
    return GetQuorum(llmqType, pQuorumBaseBlockIndex);
 }

-CQuorumCPtr CQuorumManager::GetQuorum(Consensus::LLMQType llmqType, gsl::not_null<const CBlockIndex*> pQuorumBaseBlockIndex) const
+CQuorumCPtr CQuorumManager::GetQuorum(Consensus::LLMQType llmqType, gsl::not_null<const CBlockIndex*> pQuorumBaseBlockIndex, bool populate_cache) const
 {
    auto quorumHash = pQuorumBaseBlockIndex->GetBlockHash();

@ -617,7 +619,7 @@ CQuorumCPtr CQuorumManager::GetQuorum(Consensus::LLMQType llmqType, gsl::not_nul
        return pQuorum;
    }

-    return BuildQuorumFromCommitment(llmqType, pQuorumBaseBlockIndex);
+    return BuildQuorumFromCommitment(llmqType, pQuorumBaseBlockIndex, populate_cache);
 }

 size_t CQuorumManager::GetQuorumRecoveryStartOffset(const CQuorumCPtr pQuorum, const CBlockIndex* pIndex) const
@ -850,7 +852,10 @@ void CQuorumManager::StartCachePopulatorThread(const CQuorumCPtr pQuorum) const
    }

    cxxtimer::Timer t(true);
-    LogPrint(BCLog::LLMQ, "CQuorumManager::StartCachePopulatorThread -- start\n");
+    LogPrint(BCLog::LLMQ, "CQuorumManager::StartCachePopulatorThread -- type=%d height=%d hash=%s start\n",
+            ToUnderlying(pQuorum->params.type),
+            pQuorum->m_quorum_base_block_index->nHeight,
+            pQuorum->m_quorum_base_block_index->GetBlockHash().ToString());

    // when then later some other thread tries to get keys, it will be much faster
    workerPool.push([pQuorum, t, this](int threadId) {
@ -862,7 +867,11 @@ void CQuorumManager::StartCachePopulatorThread(const CQuorumCPtr pQuorum) const
                pQuorum->GetPubKeyShare(i);
            }
        }
-        LogPrint(BCLog::LLMQ, "CQuorumManager::StartCachePopulatorThread -- done. time=%d\n", t.count());
+        LogPrint(BCLog::LLMQ, "CQuorumManager::StartCachePopulatorThread -- type=%d height=%d hash=%s done. time=%d\n",
+                ToUnderlying(pQuorum->params.type),
+                pQuorum->m_quorum_base_block_index->nHeight,
+                pQuorum->m_quorum_base_block_index->GetBlockHash().ToString(),
+                t.count());
    });
 }

--- a/src/llmq/quorums.h
+++ b/src/llmq/quorums.h
@ -267,10 +267,10 @@ private:
    // all private methods here are cs_main-free
    void CheckQuorumConnections(const Consensus::LLMQParams& llmqParams, const CBlockIndex *pindexNew) const;

-    CQuorumPtr BuildQuorumFromCommitment(Consensus::LLMQType llmqType, gsl::not_null<const CBlockIndex*> pQuorumBaseBlockIndex) const;
+    CQuorumPtr BuildQuorumFromCommitment(Consensus::LLMQType llmqType, gsl::not_null<const CBlockIndex*> pQuorumBaseBlockIndex, bool populate_cache) const;
    bool BuildQuorumContributions(const CFinalCommitmentPtr& fqc, const std::shared_ptr<CQuorum>& quorum) const;

-    CQuorumCPtr GetQuorum(Consensus::LLMQType llmqType, gsl::not_null<const CBlockIndex*> pindex) const;
+    CQuorumCPtr GetQuorum(Consensus::LLMQType llmqType, gsl::not_null<const CBlockIndex*> pindex, bool populate_cache = true) const;
    /// Returns the start offset for the masternode with the given proTxHash. This offset is applied when picking data recovery members of a quorum's
    /// memberlist and is calculated based on a list of all member of all active quorums for the given llmqType in a way that each member
    /// should receive the same number of request if all active llmqType members requests data from one llmqType quorum.