Use exponential backoff timeouts for recovery (#3535)

Starting with 2 seconds.
This commit is contained in:
Alexander Block 2020-06-24 10:02:57 +02:00 committed by GitHub
parent e015820d6a
commit de099fe7a3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 9 additions and 6 deletions

View File

@ -1028,7 +1028,7 @@ void CSigSharesManager::CollectSigSharesToSend(std::unordered_map<NodeId, std::v
proTxToNode.emplace(pnode->verifiedProRegTxHash, pnode); proTxToNode.emplace(pnode->verifiedProRegTxHash, pnode);
} }
auto curTime = GetTime(); auto curTime = GetTime<std::chrono::milliseconds>().count();
for (auto& p : signedSessions) { for (auto& p : signedSessions) {
if (p.second.attempt > p.second.quorum->params.recoveryMembers) { if (p.second.attempt > p.second.quorum->params.recoveryMembers) {
@ -1036,7 +1036,9 @@ void CSigSharesManager::CollectSigSharesToSend(std::unordered_map<NodeId, std::v
} }
if (curTime >= p.second.nextAttemptTime) { if (curTime >= p.second.nextAttemptTime) {
p.second.nextAttemptTime = curTime + SEND_FOR_RECOVERY_TIMEOUT; int64_t waitTime = exp2(p.second.attempt) * EXP_SEND_FOR_RECOVERY_TIMEOUT;
waitTime = std::min(MAX_SEND_FOR_RECOVERY_TIMEOUT, waitTime);
p.second.nextAttemptTime = curTime + waitTime;
auto dmn = SelectMemberForRecovery(p.second.quorum, p.second.sigShare.id, p.second.attempt); auto dmn = SelectMemberForRecovery(p.second.quorum, p.second.sigShare.id, p.second.attempt);
p.second.attempt++; p.second.attempt++;

View File

@ -364,7 +364,8 @@ class CSigSharesManager : public CRecoveredSigsListener
// 400 is the maximum quorum size, so this is also the maximum number of sigs we need to support // 400 is the maximum quorum size, so this is also the maximum number of sigs we need to support
const size_t MAX_MSGS_TOTAL_BATCHED_SIGS = 400; const size_t MAX_MSGS_TOTAL_BATCHED_SIGS = 400;
const int64_t SEND_FOR_RECOVERY_TIMEOUT = 1; const int64_t EXP_SEND_FOR_RECOVERY_TIMEOUT = 2000;
const int64_t MAX_SEND_FOR_RECOVERY_TIMEOUT = 10000;
const size_t MAX_MSGS_SIG_SHARES = 32; const size_t MAX_MSGS_SIG_SHARES = 32;
private: private:

View File

@ -117,9 +117,9 @@ class LLMQSigningTest(DashTestFramework):
# Make sure node0 has received qsendrecsigs from the previously isolated node # Make sure node0 has received qsendrecsigs from the previously isolated node
mn.node.ping() mn.node.ping()
wait_until(lambda: all('pingwait' not in peer for peer in mn.node.getpeerinfo())) wait_until(lambda: all('pingwait' not in peer for peer in mn.node.getpeerinfo()))
# Let 1 second pass so that the next node is used for recovery, which should succeed # Let 2 seconds pass so that the next node is used for recovery, which should succeed
self.bump_mocktime(1) self.bump_mocktime(2)
wait_for_sigs(True, False, True, 5) wait_for_sigs(True, False, True, 2)
if __name__ == '__main__': if __name__ == '__main__':
LLMQSigningTest().main() LLMQSigningTest().main()