From b79c3e01ed72b21724f4cddffc3548863ead7be5 Mon Sep 17 00:00:00 2001 From: UdjinM6 Date: Wed, 28 Jul 2021 14:10:08 +0300 Subject: [PATCH] Resolve cs_main vs governance vs masternode-sync deadlocks (#4293) --- src/masternode/masternode-sync.cpp | 86 ++++++++++++++++++------------ 1 file changed, 51 insertions(+), 35 deletions(-) diff --git a/src/masternode/masternode-sync.cpp b/src/masternode/masternode-sync.cpp index a6f2ac6b96..3ef3cb7077 100644 --- a/src/masternode/masternode-sync.cpp +++ b/src/masternode/masternode-sync.cpp @@ -139,23 +139,22 @@ void CMasternodeSync::ProcessTick(CConnman& connman) } nTimeLastProcess = GetTime(); + std::vector vNodesCopy = connman.CopyNodeVector(CConnman::FullyConnectedOnly); // gradually request the rest of the votes after sync finished if(IsSynced()) { - std::vector vNodesCopy = connman.CopyNodeVector(CConnman::FullyConnectedOnly); governance.RequestGovernanceObjectVotes(vNodesCopy, connman); connman.ReleaseNodeVector(vNodesCopy); return; } + { LOCK(cs); // Calculate "progress" for LOG reporting / GUI notification double nSyncProgress = double(nTriedPeerCount + (nCurrentAsset - 1) * 8) / (8*4); LogPrint(BCLog::MNSYNC, "CMasternodeSync::ProcessTick -- nTick %d nCurrentAsset %d nTriedPeerCount %d nSyncProgress %f\n", nTick, nCurrentAsset, nTriedPeerCount, nSyncProgress); uiInterface.NotifyAdditionalDataSyncProgressChanged(nSyncProgress); - std::vector vNodesCopy = connman.CopyNodeVector(CConnman::FullyConnectedOnly); - for (auto& pnode : vNodesCopy) { CNetMsgMaker msgMaker(pnode->GetSendVersion()); @@ -256,37 +255,10 @@ void CMasternodeSync::ProcessTick(CConnman& connman) return; } - // only request obj sync once from each peer, then request votes on per-obj basis + // only request obj sync once from each peer if(netfulfilledman.HasFulfilledRequest(pnode->addr, "governance-sync")) { - int nObjsLeftToAsk = governance.RequestGovernanceObjectVotes(pnode, connman); - static int64_t nTimeNoObjectsLeft = 0; - // check for data - if(nObjsLeftToAsk == 0) { - static int nLastTick = 0; - static int nLastVotes = 0; - if(nTimeNoObjectsLeft == 0) { - // asked all objects for votes for the first time - nTimeNoObjectsLeft = GetTime(); - } - // make sure the condition below is checked only once per tick - if(nLastTick == nTick) continue; - if(GetTime() - nTimeNoObjectsLeft > MASTERNODE_SYNC_TIMEOUT_SECONDS && - governance.GetVoteCount() - nLastVotes < std::max(int(0.0001 * nLastVotes), MASTERNODE_SYNC_TICK_SECONDS) - ) { - // We already asked for all objects, waited for MASTERNODE_SYNC_TIMEOUT_SECONDS - // after that and less then 0.01% or MASTERNODE_SYNC_TICK_SECONDS - // (i.e. 1 per second) votes were received during the last tick. - // We can be pretty sure that we are done syncing. - LogPrintf("CMasternodeSync::ProcessTick -- nTick %d nCurrentAsset %d -- asked for all objects, nothing to do\n", nTick, nCurrentAsset); - // reset nTimeNoObjectsLeft to be able to use the same condition on resync - nTimeNoObjectsLeft = 0; - SwitchToNextAsset(connman); - connman.ReleaseNodeVector(vNodesCopy); - return; - } - nLastTick = nTick; - nLastVotes = governance.GetVoteCount(); - } + // will request votes on per-obj basis from each node in a separate loop below + // to avoid deadlocks here continue; } netfulfilledman.AddFulfilledRequest(pnode->addr, "governance-sync"); @@ -296,11 +268,55 @@ void CMasternodeSync::ProcessTick(CConnman& connman) SendGovernanceSyncRequest(pnode, connman); - connman.ReleaseNodeVector(vNodesCopy); - return; //this will cause each peer to get one request each six seconds for the various assets we need + break; //this will cause each peer to get one request each six seconds for the various assets we need } } } + } // cs + + + if (WITH_LOCK(cs, return nCurrentAsset != MASTERNODE_SYNC_GOVERNANCE)) { + // looped through all nodes and not syncing governance yet/already, release them + connman.ReleaseNodeVector(vNodesCopy); + return; + } + + // request votes on per-obj basis from each node + for (auto& pnode : vNodesCopy) { + if(!netfulfilledman.HasFulfilledRequest(pnode->addr, "governance-sync")) { + continue; // to early for this node + } + int nObjsLeftToAsk = governance.RequestGovernanceObjectVotes(pnode, connman); + static int64_t nTimeNoObjectsLeft = 0; + // check for data + if(nObjsLeftToAsk == 0) { + static int nLastTick = 0; + static int nLastVotes = 0; + if(nTimeNoObjectsLeft == 0) { + // asked all objects for votes for the first time + nTimeNoObjectsLeft = GetTime(); + } + // make sure the condition below is checked only once per tick + if(nLastTick == nTick) continue; + if(GetTime() - nTimeNoObjectsLeft > MASTERNODE_SYNC_TIMEOUT_SECONDS && + governance.GetVoteCount() - nLastVotes < std::max(int(0.0001 * nLastVotes), MASTERNODE_SYNC_TICK_SECONDS) + ) { + // We already asked for all objects, waited for MASTERNODE_SYNC_TIMEOUT_SECONDS + // after that and less then 0.01% or MASTERNODE_SYNC_TICK_SECONDS + // (i.e. 1 per second) votes were received during the last tick. + // We can be pretty sure that we are done syncing. + LogPrintf("CMasternodeSync::ProcessTick -- nTick %d nCurrentAsset %d -- asked for all objects, nothing to do\n", nTick, MASTERNODE_SYNC_GOVERNANCE); + // reset nTimeNoObjectsLeft to be able to use the same condition on resync + nTimeNoObjectsLeft = 0; + SwitchToNextAsset(connman); + connman.ReleaseNodeVector(vNodesCopy); + return; + } + nLastTick = nTick; + nLastVotes = governance.GetVoteCount(); + } + } + // looped through all nodes, release them connman.ReleaseNodeVector(vNodesCopy); }