Multiple fixes and optimizations for LLMQs and ChainLocks (#2724)

* Indicate success when signing was unnecessary

* Fix typo in name of LLMQ_400_60

* Move RemoveAskFor call for CLSIGs into ProcessNewChainLock

In case we got INV items for the same CLSIG that we recreated through
HandleNewRecoveredSig, (re-)requesting of the CLSIG from other peers
becomes unnecessary.

* Move Cleanup() call in CChainLocksHandler::UpdatedBlockTip up

We bail out early in a few situations from this method, so that Cleanup()
might not be called while its at the bottom.

* Bail out from CChainLocksHandler::UpdatedBlockTip if we already got the CLSIG

* Call RemoveAskFor when QFCOMMITMENT was received

Otherwise we might end up re-requesting it for a very long time when the
commitment INV was received shortly before it got mined.

* Call RemoveSigSharesForSession when a recovered sig is received

Otherwise we end up with session data in node states lingering around until
a fake "timeout" occurs (can be seen in the logs).

* Better handling of false-positive conflicts in CSigningManager

The old code was emitting a lot of messages in logs as it treated sigs
for exactly the same session as a conflict. This commit fixes this by
looking at the signHash before logging.

Also handle a corner-case where a recovered sig might be deleted between
the HasRecoveredSigForId and GetRecoveredSigById call.

* Don't run into session timeout when sig shares come in slow

Instead of just tracking when the first share was received, we now also
track when the last (non-duplicate) share was received. Sessios will now
timeout 5 minutes after the first share arrives, or 1 minute after the last
one arrived.
This commit is contained in:
Alexander Block 2019-02-27 14:10:12 +01:00 committed by UdjinM6
parent fcd3b4fd49
commit f305cf77b6
7 changed files with 90 additions and 24 deletions

View File

@ -145,7 +145,7 @@ static Consensus::LLMQParams llmq50_60 = {
static Consensus::LLMQParams llmq400_60 = {
.type = Consensus::LLMQ_400_60,
.name = "llmq_400_51",
.name = "llmq_400_60",
.size = 400,
.minSize = 300,
.threshold = 240,

View File

@ -31,6 +31,12 @@ void CQuorumBlockProcessor::ProcessMessage(CNode* pfrom, const std::string& strC
CFinalCommitment qc;
vRecv >> qc;
auto hash = ::SerializeHash(qc);
{
LOCK(cs_main);
connman.RemoveAskFor(hash);
}
if (qc.IsNull()) {
LOCK(cs_main);
LogPrintf("CQuorumBlockProcessor::%s -- null commitment from peer=%d\n", __func__, pfrom->id);

View File

@ -75,17 +75,17 @@ void CChainLocksHandler::ProcessMessage(CNode* pfrom, const std::string& strComm
auto hash = ::SerializeHash(clsig);
{
LOCK(cs_main);
connman.RemoveAskFor(hash);
}
ProcessNewChainLock(pfrom->id, clsig, hash);
}
}
void CChainLocksHandler::ProcessNewChainLock(NodeId from, const llmq::CChainLockSig& clsig, const uint256& hash)
{
{
LOCK(cs_main);
g_connman->RemoveAskFor(hash);
}
{
LOCK(cs);
if (!seenChainLocks.emplace(hash, GetTimeMillis()).second) {
@ -188,6 +188,8 @@ void CChainLocksHandler::UpdatedBlockTip(const CBlockIndex* pindexNew, const CBl
return;
}
Cleanup();
// DIP8 defines a process called "Signing attempts" which should run before the CLSIG is finalized
// To simplify the initial implementation, we skip this process and directly try to create a CLSIG
// This will fail when multiple blocks compete, but we accept this for the initial implementation.
@ -199,6 +201,12 @@ void CChainLocksHandler::UpdatedBlockTip(const CBlockIndex* pindexNew, const CBl
{
LOCK(cs);
if (bestChainLockBlockIndex == pindexNew) {
// we first got the CLSIG, then the header, and then the block was connected.
// In this case there is no need to continue here.
return;
}
if (InternalHasConflictingChainLock(pindexNew->nHeight, pindexNew->GetBlockHash())) {
if (!inEnforceBestChainLock) {
// we accepted this block when there was no lock yet, but now a conflicting lock appeared. Invalidate it.
@ -224,8 +232,6 @@ void CChainLocksHandler::UpdatedBlockTip(const CBlockIndex* pindexNew, const CBl
}
quorumSigningManager->AsyncSignIfMember(Params().GetConsensus().llmqChainLocks, requestId, msgHash);
Cleanup();
}
// WARNING: cs_main and cs should not be held!

View File

@ -58,6 +58,7 @@ void StartLLMQSystem()
quorumDKGSessionManager->StartMessageHandlerPool();
}
if (quorumSigSharesManager) {
quorumSigSharesManager->RegisterAsRecoveredSigsListener();
quorumSigSharesManager->StartWorkerThread();
}
if (chainLocksHandler) {
@ -72,6 +73,7 @@ void StopLLMQSystem()
}
if (quorumSigSharesManager) {
quorumSigSharesManager->StopWorkerThread();
quorumSigSharesManager->UnregisterAsRecoveredSigsListener();
}
if (quorumDKGSessionManager) {
quorumDKGSessionManager->StopMessageHandlerPool();

View File

@ -500,11 +500,25 @@ void CSigningManager::ProcessRecoveredSig(NodeId nodeId, const CRecoveredSig& re
signHash.ToString(), recoveredSig.id.ToString(), recoveredSig.msgHash.ToString(), nodeId);
if (db.HasRecoveredSigForId(llmqType, recoveredSig.id)) {
// this should really not happen, as each masternode is participating in only one vote,
// even if it's a member of multiple quorums. so a majority is only possible on one quorum and one msgHash per id
LogPrintf("CSigningManager::%s -- conflicting recoveredSig for id=%s, msgHash=%s\n", __func__,
recoveredSig.id.ToString(), recoveredSig.msgHash.ToString());
return;
CRecoveredSig otherRecoveredSig;
if (db.GetRecoveredSigById(llmqType, recoveredSig.id, otherRecoveredSig)) {
auto otherSignHash = CLLMQUtils::BuildSignHash(recoveredSig);
if (signHash != otherSignHash) {
// this should really not happen, as each masternode is participating in only one vote,
// even if it's a member of multiple quorums. so a majority is only possible on one quorum and one msgHash per id
LogPrintf("CSigningManager::%s -- conflicting recoveredSig for signHash=%s, id=%s, msgHash=%s, otherSignHash=%s\n", __func__,
signHash.ToString(), recoveredSig.id.ToString(), recoveredSig.msgHash.ToString(), otherSignHash.ToString());
} else {
// Looks like we're trying to process a recSig that is already known. This might happen if the same
// recSig comes in through regular QRECSIG messages and at the same time through some other message
// which allowed to reconstruct a recSig (e.g. IXLOCK). In this case, just bail out.
}
return;
} else {
// This case is very unlikely. It can only happen when cleanup caused this specific recSig to vanish
// between the HasRecoveredSigForId and GetRecoveredSigById call. If that happens, treat it as if we
// never had that recSig
}
}
db.WriteRecoveredSig(recoveredSig);
@ -559,14 +573,19 @@ bool CSigningManager::AsyncSignIfMember(Consensus::LLMQType llmqType, const uint
if (db.HasVotedOnId(llmqType, id)) {
uint256 prevMsgHash;
db.GetVoteForId(llmqType, id, prevMsgHash);
LogPrintf("CSigningManager::%s -- already voted for id=%s and msgHash=%s. Not voting on conflicting msgHash=%s\n", __func__,
id.ToString(), prevMsgHash.ToString(), msgHash.ToString());
if (msgHash != prevMsgHash) {
LogPrintf("CSigningManager::%s -- already voted for id=%s and msgHash=%s. Not voting on conflicting msgHash=%s\n", __func__,
id.ToString(), prevMsgHash.ToString(), msgHash.ToString());
} else {
LogPrintf("CSigningManager::%s -- already voted for id=%s and msgHash=%s. Not voting again.\n", __func__,
id.ToString(), prevMsgHash.ToString());
}
return false;
}
if (db.HasRecoveredSigForId(llmqType, id)) {
// no need to sign it if we already have a recovered sig
return false;
return true;
}
db.WriteVoteForId(llmqType, id, msgHash);
}

View File

@ -179,6 +179,16 @@ void CSigSharesManager::StopWorkerThread()
}
}
void CSigSharesManager::RegisterAsRecoveredSigsListener()
{
quorumSigningManager->RegisterRecoveredSigsListener(this);
}
void CSigSharesManager::UnregisterAsRecoveredSigsListener()
{
quorumSigningManager->UnregisterRecoveredSigsListener(this);
}
void CSigSharesManager::InterruptWorkerThread()
{
workInterrupt();
@ -557,7 +567,16 @@ void CSigSharesManager::ProcessSigShare(NodeId nodeId, const CSigShare& sigShare
}
sigSharesToAnnounce.Add(sigShare.GetKey(), true);
firstSeenForSessions.emplace(sigShare.GetSignHash(), GetTimeMillis());
auto it = timeSeenForSessions.find(sigShare.GetSignHash());
if (it == timeSeenForSessions.end()) {
auto t = GetTimeMillis();
// insert first-seen and last-seen time
timeSeenForSessions.emplace(sigShare.GetSignHash(), std::make_pair(t, t));
} else {
// update last-seen time
it->second.second = GetTimeMillis();
}
if (!quorumNodes.empty()) {
// don't announce and wait for other nodes to request this share and directly send it to them
@ -958,11 +977,12 @@ void CSigSharesManager::Cleanup()
// Remove sessions which timed out
std::unordered_set<uint256> timeoutSessions;
for (auto& p : firstSeenForSessions) {
for (auto& p : timeSeenForSessions) {
auto& signHash = p.first;
int64_t time = p.second;
int64_t firstSeenTime = p.second.first;
int64_t lastSeenTime = p.second.second;
if (now - time >= SIGNING_SESSION_TIMEOUT) {
if (now - firstSeenTime >= SESSION_TOTAL_TIMEOUT || now - lastSeenTime >= SESSION_NEW_SHARES_TIMEOUT) {
timeoutSessions.emplace(signHash);
}
}
@ -1044,7 +1064,7 @@ void CSigSharesManager::RemoveSigSharesForSession(const uint256& signHash)
sigSharesRequested.EraseAllForSignHash(signHash);
sigSharesToAnnounce.EraseAllForSignHash(signHash);
sigShares.EraseAllForSignHash(signHash);
firstSeenForSessions.erase(signHash);
timeSeenForSessions.erase(signHash);
}
void CSigSharesManager::RemoveBannedNodeStates()
@ -1192,4 +1212,10 @@ void CSigSharesManager::Sign(const CQuorumCPtr& quorum, const uint256& id, const
ProcessSigShare(-1, sigShare, *g_connman, quorum);
}
void CSigSharesManager::HandleNewRecoveredSig(const llmq::CRecoveredSig& recoveredSig)
{
LOCK(cs);
RemoveSigSharesForSession(CLLMQUtils::BuildSignHash(recoveredSig));
}
}

View File

@ -332,9 +332,10 @@ public:
void RemoveSession(const uint256& signHash);
};
class CSigSharesManager
class CSigSharesManager : public CRecoveredSigsListener
{
static const int64_t SIGNING_SESSION_TIMEOUT = 60 * 1000;
static const int64_t SESSION_NEW_SHARES_TIMEOUT = 60 * 1000;
static const int64_t SESSION_TOTAL_TIMEOUT = 5 * 60 * 1000;
static const int64_t SIG_SHARE_REQUEST_TIMEOUT = 5 * 1000;
private:
@ -344,7 +345,9 @@ private:
CThreadInterrupt workInterrupt;
SigShareMap<CSigShare> sigShares;
std::unordered_map<uint256, int64_t> firstSeenForSessions;
// stores time of first and last receivedSigShare. Used to detect timeouts
std::unordered_map<uint256, std::pair<int64_t, int64_t>> timeSeenForSessions;
std::unordered_map<NodeId, CSigSharesNodeState> nodeStates;
SigShareMap<std::pair<NodeId, int64_t>> sigSharesRequested;
@ -363,6 +366,8 @@ public:
void StartWorkerThread();
void StopWorkerThread();
void RegisterAsRecoveredSigsListener();
void UnregisterAsRecoveredSigsListener();
void InterruptWorkerThread();
public:
@ -371,6 +376,8 @@ public:
void AsyncSign(const CQuorumCPtr& quorum, const uint256& id, const uint256& msgHash);
void Sign(const CQuorumCPtr& quorum, const uint256& id, const uint256& msgHash);
void HandleNewRecoveredSig(const CRecoveredSig& recoveredSig);
private:
void ProcessMessageSigSharesInv(CNode* pfrom, const CSigSharesInv& inv, CConnman& connman);
void ProcessMessageGetSigShares(CNode* pfrom, const CSigSharesInv& inv, CConnman& connman);