fix(llmq): mark mns "bad" based on the failed connect attempts count (#4910)

* fix(llmq): mark mns "bad" based on the failed connect attempts count

Avoid using "last success time" as a proxy

* fix(tests): tweak feature_llmq_simplepose.py
This commit is contained in:
UdjinM6 2022-07-14 21:38:02 +03:00 committed by GitHub
parent 04b7bcf52a
commit 32051a35c0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 17 additions and 14 deletions

View File

@ -473,10 +473,9 @@ void CDKGSession::VerifyConnectionAndMinProtoVersions() const
logger.Batch("%s does not have min proto version %d (has %d)", m->dmn->proTxHash.ToString(), MIN_MASTERNODE_PROTO_VERSION, it->second);
}
auto lastOutbound = mmetaman.GetMetaInfo(m->dmn->proTxHash)->GetLastOutboundSuccess();
if (GetAdjustedTime() - lastOutbound > 60 * 60) {
if (mmetaman.GetMetaInfo(m->dmn->proTxHash)->OutboundFailedTooManyTimes()) {
m->badConnection = true;
logger.Batch("%s no outbound connection since %d seconds", m->dmn->proTxHash.ToString(), GetAdjustedTime() - lastOutbound);
logger.Batch("%s failed to connect to it too many times", m->dmn->proTxHash.ToString());
}
}
}

View File

@ -2517,8 +2517,7 @@ void CConnman::ThreadOpenMasternodeConnections()
LogPrint(BCLog::NET_NETCONN, "CConnman::%s -- connection failed for masternode %s, service=%s\n", __func__, connectToDmn->proTxHash.ToString(), connectToDmn->pdmnState->addr.ToString(false));
// Will take a few consequent failed attempts to PoSe-punish a MN.
if (mmetaman.GetMetaInfo(connectToDmn->proTxHash)->OutboundFailedTooManyTimes()) {
LogPrint(BCLog::NET_NETCONN, "CConnman::%s -- failed to connect to masternode %s too many times, resetting outbound success time\n", __func__, connectToDmn->proTxHash.ToString());
mmetaman.GetMetaInfo(connectToDmn->proTxHash)->SetLastOutboundSuccess(0);
LogPrint(BCLog::NET_NETCONN, "CConnman::%s -- failed to connect to masternode %s too many times\n", __func__, connectToDmn->proTxHash.ToString());
}
}
}

View File

@ -66,7 +66,7 @@ class LLMQSimplePoSeTest(DashTestFramework):
def isolate_mn(self, mn):
mn.node.setnetworkactive(False)
wait_until(lambda: mn.node.getconnectioncount() == 0)
return True
return True, True
def close_mn_port(self, mn):
self.stop_node(mn.node.index)
@ -77,14 +77,14 @@ class LLMQSimplePoSeTest(DashTestFramework):
if mn2 is not mn:
connect_nodes(mn.node, mn2.node.index)
self.reset_probe_timeouts()
return False
return False, False
def force_old_mn_proto(self, mn):
self.stop_node(mn.node.index)
self.start_masternode(mn, ["-pushversion=70216"])
connect_nodes(mn.node, 0)
self.reset_probe_timeouts()
return False
return False, True
def test_no_banning(self, expected_connections=None):
for i in range(3):
@ -98,13 +98,18 @@ class LLMQSimplePoSeTest(DashTestFramework):
expected_contributors = len(mninfos_online)
for i in range(2):
mn = mninfos_valid.pop()
went_offline = invalidate_proc(mn)
went_offline, instant_ban = invalidate_proc(mn)
if went_offline:
mninfos_online.remove(mn)
expected_contributors -= 1
t = time.time()
while (not self.check_banned(mn)) and (time.time() - t) < 120:
# NOTE: Min PoSe penalty is 100 (see CDeterministicMNList::CalcMaxPoSePenalty()),
# so nodes are PoSe-banned in the same DKG they misbehave without being PoSe-punished first.
if not instant_ban:
# it's ok to miss probes/quorum connections up to 5 times
for i in range(5):
self.reset_probe_timeouts()
self.mine_quorum(expected_connections=expected_connections, expected_members=expected_contributors, expected_contributions=expected_contributors, expected_complaints=0, expected_commitments=expected_contributors, mninfos_online=mninfos_online, mninfos_valid=mninfos_valid)
self.reset_probe_timeouts()
self.mine_quorum(expected_connections=expected_connections, expected_members=expected_contributors, expected_contributions=expected_contributors, expected_complaints=expected_contributors-1, expected_commitments=expected_contributors, mninfos_online=mninfos_online, mninfos_valid=mninfos_valid)
@ -144,7 +149,7 @@ class LLMQSimplePoSeTest(DashTestFramework):
def reset_probe_timeouts(self):
# Make sure all masternodes will reconnect/re-probe
self.bump_mocktime(50 * 60 + 1)
self.bump_mocktime(10 * 60 + 1)
# Sleep a couple of seconds to let mn sync tick to happen
time.sleep(2)