Use exponential backoff timeouts for recovery (#3535)

Starting with 2 seconds.
This commit is contained in:
Alexander Block 2020-06-24 10:02:57 +02:00 committed by GitHub
parent e015820d6a
commit de099fe7a3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 9 additions and 6 deletions

View File

@ -1028,7 +1028,7 @@ void CSigSharesManager::CollectSigSharesToSend(std::unordered_map<NodeId, std::v
proTxToNode.emplace(pnode->verifiedProRegTxHash, pnode);
}
auto curTime = GetTime();
auto curTime = GetTime<std::chrono::milliseconds>().count();
for (auto& p : signedSessions) {
if (p.second.attempt > p.second.quorum->params.recoveryMembers) {
@ -1036,7 +1036,9 @@ void CSigSharesManager::CollectSigSharesToSend(std::unordered_map<NodeId, std::v
}
if (curTime >= p.second.nextAttemptTime) {
p.second.nextAttemptTime = curTime + SEND_FOR_RECOVERY_TIMEOUT;
int64_t waitTime = exp2(p.second.attempt) * EXP_SEND_FOR_RECOVERY_TIMEOUT;
waitTime = std::min(MAX_SEND_FOR_RECOVERY_TIMEOUT, waitTime);
p.second.nextAttemptTime = curTime + waitTime;
auto dmn = SelectMemberForRecovery(p.second.quorum, p.second.sigShare.id, p.second.attempt);
p.second.attempt++;

View File

@ -364,7 +364,8 @@ class CSigSharesManager : public CRecoveredSigsListener
// 400 is the maximum quorum size, so this is also the maximum number of sigs we need to support
const size_t MAX_MSGS_TOTAL_BATCHED_SIGS = 400;
const int64_t SEND_FOR_RECOVERY_TIMEOUT = 1;
const int64_t EXP_SEND_FOR_RECOVERY_TIMEOUT = 2000;
const int64_t MAX_SEND_FOR_RECOVERY_TIMEOUT = 10000;
const size_t MAX_MSGS_SIG_SHARES = 32;
private:

View File

@ -117,9 +117,9 @@ class LLMQSigningTest(DashTestFramework):
# Make sure node0 has received qsendrecsigs from the previously isolated node
mn.node.ping()
wait_until(lambda: all('pingwait' not in peer for peer in mn.node.getpeerinfo()))
# Let 1 second pass so that the next node is used for recovery, which should succeed
self.bump_mocktime(1)
wait_for_sigs(True, False, True, 5)
# Let 2 seconds pass so that the next node is used for recovery, which should succeed
self.bump_mocktime(2)
wait_for_sigs(True, False, True, 2)
if __name__ == '__main__':
LLMQSigningTest().main()