fix: Improve quorum data caching and cleanup (#5731)

## Issue being fixed or feature implemented

## What was done?

## How Has This Been Tested?


## Breaking Changes


## Checklist:
- [x] I have performed a self-review of my own code
- [x] I have commented my code, particularly in hard-to-understand areas
- [ ] I have added or updated relevant unit/integration/functional/e2e
tests
- [ ] I have made corresponding changes to the documentation
- [x] I have assigned this pull request to a milestone _(for repository
code-owners and collaborators only)_

---------

Co-authored-by: PastaPastaPasta <6443210+PastaPastaPasta@users.noreply.github.com>
This commit is contained in:
UdjinM6 2023-11-29 17:17:58 +03:00 committed by GitHub
parent 6c57cc26e2
commit 00a076dd35
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 67 additions and 31 deletions

View File

@ -104,6 +104,13 @@ struct LLMQParams {
// For rotated quorums it should be equal to 2 x active quorums set.
int keepOldConnections;
// The number of quorums for which we should keep keys. Usually it's equal to keepOldConnections.
// Unlike for other quorum types we want to keep data (secret key shares and vvec)
// for Platform quorums for much longer because Platform can be restarted and
// it must be able to re-sign stuff.
int keepOldKeys;
// How many members should we try to send all sigShares to before we give up.
int recoveryMembers;
};
@ -138,6 +145,7 @@ static constexpr std::array<LLMQParams, 14> available_llmqs = {
.signingActiveQuorumCount = 2, // just a few ones to allow easier testing
.keepOldConnections = 3,
.keepOldKeys = 3,
.recoveryMembers = 3,
},
@ -163,6 +171,7 @@ static constexpr std::array<LLMQParams, 14> available_llmqs = {
.signingActiveQuorumCount = 2, // just a few ones to allow easier testing
.keepOldConnections = 3,
.keepOldKeys = 3,
.recoveryMembers = 3,
},
@ -188,6 +197,7 @@ static constexpr std::array<LLMQParams, 14> available_llmqs = {
.signingActiveQuorumCount = 2, // just a few ones to allow easier testing
.keepOldConnections = 3,
.keepOldKeys = 3,
.recoveryMembers = 3,
},
@ -213,6 +223,7 @@ static constexpr std::array<LLMQParams, 14> available_llmqs = {
.signingActiveQuorumCount = 2, // just a few ones to allow easier testing
.keepOldConnections = 4,
.keepOldKeys = 4,
.recoveryMembers = 3,
},
@ -238,6 +249,7 @@ static constexpr std::array<LLMQParams, 14> available_llmqs = {
.signingActiveQuorumCount = 2, // just a few ones to allow easier testing
.keepOldConnections = 4,
.keepOldKeys = 24 * 30 * 2, // 2 months of quorums
.recoveryMembers = 3,
},
@ -263,6 +275,7 @@ static constexpr std::array<LLMQParams, 14> available_llmqs = {
.signingActiveQuorumCount = 4, // just a few ones to allow easier testing
.keepOldConnections = 5,
.keepOldKeys = 5,
.recoveryMembers = 6,
},
@ -288,6 +301,7 @@ static constexpr std::array<LLMQParams, 14> available_llmqs = {
.signingActiveQuorumCount = 2, // just a few ones to allow easier testing
.keepOldConnections = 4,
.keepOldKeys = 4,
.recoveryMembers = 4,
},
@ -313,6 +327,7 @@ static constexpr std::array<LLMQParams, 14> available_llmqs = {
.signingActiveQuorumCount = 4, // just a few ones to allow easier testing
.keepOldConnections = 5,
.keepOldKeys = 24 * 30 * 2, // 2 months of quorums
.recoveryMembers = 6,
},
@ -338,6 +353,7 @@ static constexpr std::array<LLMQParams, 14> available_llmqs = {
.signingActiveQuorumCount = 24, // a full day worth of LLMQs
.keepOldConnections = 25,
.keepOldKeys = 25,
.recoveryMembers = 25,
},
@ -363,6 +379,7 @@ static constexpr std::array<LLMQParams, 14> available_llmqs = {
.signingActiveQuorumCount = 32,
.keepOldConnections = 64,
.keepOldKeys = 64,
.recoveryMembers = 25,
},
@ -389,6 +406,7 @@ static constexpr std::array<LLMQParams, 14> available_llmqs = {
.signingActiveQuorumCount = 4, // two days worth of LLMQs
.keepOldConnections = 5,
.keepOldKeys = 5,
.recoveryMembers = 100,
},
@ -416,6 +434,7 @@ static constexpr std::array<LLMQParams, 14> available_llmqs = {
.signingActiveQuorumCount = 4, // four days worth of LLMQs
.keepOldConnections = 5,
.keepOldKeys = 5,
.recoveryMembers = 100,
},
@ -443,6 +462,7 @@ static constexpr std::array<LLMQParams, 14> available_llmqs = {
.signingActiveQuorumCount = 24, // a full day worth of LLMQs
.keepOldConnections = 25,
.keepOldKeys = 24 * 30 * 2, // 2 months of quorums
.recoveryMembers = 50,
},
@ -470,6 +490,7 @@ static constexpr std::array<LLMQParams, 14> available_llmqs = {
.signingActiveQuorumCount = 24, // a full day worth of LLMQs
.keepOldConnections = 25,
.keepOldKeys = 24 * 30 * 2, // 2 months of quorums
.recoveryMembers = 12,
},

View File

@ -200,8 +200,8 @@ CQuorumManager::CQuorumManager(CBLSWorker& _blsWorker, CChainState& chainstate,
m_mn_sync(mn_sync),
m_peerman(peerman)
{
utils::InitQuorumsCache(mapQuorumsCache);
utils::InitQuorumsCache(scanQuorumsCache);
utils::InitQuorumsCache(mapQuorumsCache, false);
utils::InitQuorumsCache(scanQuorumsCache, false);
quorumThreadInterrupt.reset();
}
@ -296,7 +296,7 @@ void CQuorumManager::UpdatedBlockTip(const CBlockIndex* pindexNew, bool fInitial
}
TriggerQuorumDataRecoveryThreads(pindexNew);
CleanupOldQuorumData(pindexNew);
StartCleanupOldQuorumDataThread(pindexNew);
}
void CQuorumManager::CheckQuorumConnections(const Consensus::LLMQParams& llmqParams, const CBlockIndex* pindexNew) const
@ -955,7 +955,7 @@ void CQuorumManager::StartQuorumDataRecoveryThread(const CQuorumCPtr pQuorum, co
});
}
static void DataCleanupHelper(CDBWrapper& db, std::set<uint256> skip_list)
static void DataCleanupHelper(CDBWrapper& db, std::set<uint256> skip_list, bool compact = false)
{
const auto prefixes = {DB_QUORUM_QUORUM_VVEC, DB_QUORUM_SK_SHARE};
@ -989,39 +989,54 @@ static void DataCleanupHelper(CDBWrapper& db, std::set<uint256> skip_list)
db.WriteBatch(batch);
LogPrint(BCLog::LLMQ, "CQuorumManager::%d -- %s removed %d\n", __func__, prefix, count);
LogPrint(BCLog::LLMQ, "CQuorumManager::%s -- %s removed %d\n", __func__, prefix, count);
}
pcursor.reset();
if (compact) {
// Avoid using this on regular cleanups, use on db migrations only
LogPrint(BCLog::LLMQ, "CQuorumManager::%s -- compact start\n", __func__);
db.CompactFull();
LogPrint(BCLog::LLMQ, "CQuorumManager::%s -- compact end\n", __func__);
}
}
void CQuorumManager::CleanupOldQuorumData(const CBlockIndex* pIndex) const
void CQuorumManager::StartCleanupOldQuorumDataThread(const CBlockIndex* pIndex) const
{
if (!fMasternodeMode || pIndex == nullptr || (pIndex->nHeight % 576 != 0)) {
// Note: this function is CPU heavy and we don't want it to be running during DKGs.
// The largest dkgMiningWindowStart for a related quorum type is 42 (LLMQ_60_75).
// At the same time most quorums use dkgInterval = 24 so the next DKG for them
// (after block 576 + 42) will start at block 576 + 24 * 2. That's only a 6 blocks
// window and it's better to have more room so we pick next cycle.
// dkgMiningWindowStart for small quorums is 10 i.e. a safe block to start
// these calculations is at height 576 + 24 * 2 + 10 = 576 + 58.
if (!fMasternodeMode || pIndex == nullptr || (pIndex->nHeight % 576 != 58)) {
return;
}
cxxtimer::Timer t(/*start=*/ true);
LogPrint(BCLog::LLMQ, "CQuorumManager::%s -- start\n", __func__);
// do not block the caller thread
workerPool.push([pIndex, t, this](int threadId) {
std::set<uint256> dbKeysToSkip;
LogPrint(BCLog::LLMQ, "CQuorumManager::%d -- start\n", __func__);
// Platform quorums in all networks are created every 24 blocks (~1h).
// Unlike for other quorum types we want to keep data (secret key shares and vvec)
// for Platform quorums for at least 2 months because Platform can be restarted and
// it must be able to re-sign stuff. During a month, 24 * 30 quorums are created.
constexpr auto numPlatformQuorumsDataToKeep = 24 * 30 * 2;
for (const auto& params : Params().GetConsensus().llmqs) {
auto nQuorumsToKeep = params.type == Params().GetConsensus().llmqTypePlatform ? numPlatformQuorumsDataToKeep : params.keepOldConnections;
const auto vecQuorums = ScanQuorums(params.type, pIndex, nQuorumsToKeep);
for (const auto& pQuorum : vecQuorums) {
if (quorumThreadInterrupt) {
break;
}
for (const auto& pQuorum : ScanQuorums(params.type, pIndex, params.keepOldKeys)) {
dbKeysToSkip.insert(MakeQuorumKey(*pQuorum));
}
}
if (!quorumThreadInterrupt) {
DataCleanupHelper(m_evoDb.GetRawDB(), dbKeysToSkip);
}
LogPrint(BCLog::LLMQ, "CQuorumManager::%d -- done\n", __func__);
LogPrint(BCLog::LLMQ, "CQuorumManager::StartCleanupOldQuorumDataThread -- done. time=%d\n", t.count());
});
}
} // namespace llmq

View File

@ -277,7 +277,7 @@ private:
void StartCachePopulatorThread(const CQuorumCPtr pQuorum) const;
void StartQuorumDataRecoveryThread(const CQuorumCPtr pQuorum, const CBlockIndex* pIndex, uint16_t nDataMask) const;
void CleanupOldQuorumData(const CBlockIndex* pIndex) const;
void StartCleanupOldQuorumDataThread(const CBlockIndex* pIndex) const;
};
extern std::unique_ptr<CQuorumManager> quorumManager;

View File

@ -1085,17 +1085,17 @@ std::map<Consensus::LLMQType, QvvecSyncMode> GetEnabledQuorumVvecSyncEntries()
}
template <typename CacheType>
void InitQuorumsCache(CacheType& cache)
void InitQuorumsCache(CacheType& cache, bool limit_by_connections)
{
for (const auto& llmq : Params().GetConsensus().llmqs) {
cache.emplace(std::piecewise_construct, std::forward_as_tuple(llmq.type),
std::forward_as_tuple(llmq.keepOldConnections));
std::forward_as_tuple(limit_by_connections ? llmq.keepOldConnections : llmq.keepOldKeys));
}
}
template void InitQuorumsCache<std::map<Consensus::LLMQType, unordered_lru_cache<uint256, bool, StaticSaltedHasher>>>(std::map<Consensus::LLMQType, unordered_lru_cache<uint256, bool, StaticSaltedHasher>>& cache);
template void InitQuorumsCache<std::map<Consensus::LLMQType, unordered_lru_cache<uint256, std::vector<CQuorumCPtr>, StaticSaltedHasher>>>(std::map<Consensus::LLMQType, unordered_lru_cache<uint256, std::vector<CQuorumCPtr>, StaticSaltedHasher>>& cache);
template void InitQuorumsCache<std::map<Consensus::LLMQType, unordered_lru_cache<uint256, std::shared_ptr<llmq::CQuorum>, StaticSaltedHasher, 0ul, 0ul>, std::less<Consensus::LLMQType>, std::allocator<std::pair<Consensus::LLMQType const, unordered_lru_cache<uint256, std::shared_ptr<llmq::CQuorum>, StaticSaltedHasher, 0ul, 0ul>>>>>(std::map<Consensus::LLMQType, unordered_lru_cache<uint256, std::shared_ptr<llmq::CQuorum>, StaticSaltedHasher, 0ul, 0ul>, std::less<Consensus::LLMQType>, std::allocator<std::pair<Consensus::LLMQType const, unordered_lru_cache<uint256, std::shared_ptr<llmq::CQuorum>, StaticSaltedHasher, 0ul, 0ul>>>>&);
template void InitQuorumsCache<std::map<Consensus::LLMQType, unordered_lru_cache<uint256, int, StaticSaltedHasher>>>(std::map<Consensus::LLMQType, unordered_lru_cache<uint256, int, StaticSaltedHasher>>& cache);
template void InitQuorumsCache<std::map<Consensus::LLMQType, unordered_lru_cache<uint256, bool, StaticSaltedHasher>>>(std::map<Consensus::LLMQType, unordered_lru_cache<uint256, bool, StaticSaltedHasher>>& cache, bool limit_by_connections);
template void InitQuorumsCache<std::map<Consensus::LLMQType, unordered_lru_cache<uint256, std::vector<CQuorumCPtr>, StaticSaltedHasher>>>(std::map<Consensus::LLMQType, unordered_lru_cache<uint256, std::vector<CQuorumCPtr>, StaticSaltedHasher>>& cache, bool limit_by_connections);
template void InitQuorumsCache<std::map<Consensus::LLMQType, unordered_lru_cache<uint256, std::shared_ptr<llmq::CQuorum>, StaticSaltedHasher, 0ul, 0ul>, std::less<Consensus::LLMQType>, std::allocator<std::pair<Consensus::LLMQType const, unordered_lru_cache<uint256, std::shared_ptr<llmq::CQuorum>, StaticSaltedHasher, 0ul, 0ul>>>>>(std::map<Consensus::LLMQType, unordered_lru_cache<uint256, std::shared_ptr<llmq::CQuorum>, StaticSaltedHasher, 0ul, 0ul>, std::less<Consensus::LLMQType>, std::allocator<std::pair<Consensus::LLMQType const, unordered_lru_cache<uint256, std::shared_ptr<llmq::CQuorum>, StaticSaltedHasher, 0ul, 0ul>>>>&cache, bool limit_by_connections);
template void InitQuorumsCache<std::map<Consensus::LLMQType, unordered_lru_cache<uint256, int, StaticSaltedHasher>>>(std::map<Consensus::LLMQType, unordered_lru_cache<uint256, int, StaticSaltedHasher>>& cache, bool limit_by_connections);
} // namespace utils

View File

@ -117,7 +117,7 @@ void IterateNodesRandom(NodesContainer& nodeStates, Continue&& cont, Callback&&
}
template <typename CacheType>
void InitQuorumsCache(CacheType& cache);
void InitQuorumsCache(CacheType& cache, bool limit_by_connections = true);
} // namespace utils