Multiple fixes/refactorings for ChainLocks (#2765)

* Print which DKG type aborted

* Don't directly call EnforceBestChainLock and instead schedule the call

Calling EnforceBestChainLock might result in switching chains, which in
turn might end up calling signals, so we get into a recursive call chain.

Better to call EnforceBestChainLock from the scheduler.

* Regularly call EnforceBestChainLock and reset error flags on locked chain

* Don't invalidate blocks from CChainLocksHandler::TrySignChainTip

As the name of this method implies, it's trying to sign something and not
enforce/invalidate chains. Invalidating blocks is the job of
EnforceBestChainLock.

* Only call ActivateBestChain when tip != best CL tip

* Fix unprotected access of bestChainLockBlockIndex and bail out if its null

* Fix ChainLocks tests after changes in enforcement handling

* Only invoke NotifyChainLock signal from EnforceBestChainLock

This ensures that NotifyChainLock is not prematurely called before the
block is fully connected.

* Use a mutex to ensure that only one thread executes ActivateBestChain

It might happen that 2 threads enter ActivateBestChain at the same time
start processing block by block, while randomly switching between threads
so that sometimes one thread processed the block and then another one
processes it. A mutex protects ActivateBestChain now against this race.

* Rename local copy of bestChainLockBlockIndex to currentBestChainLockBlockIndex

* Don't call ActivateBestChain when best CL is part of the main chain
This commit is contained in:
Alexander Block 2019-03-13 14:00:54 +01:00 committed by UdjinM6
parent 8955eb82ef
commit 3a1aeb000e
5 changed files with 65 additions and 64 deletions

View File

@ -68,17 +68,17 @@ class LLMQChainLocksTest(DashTestFramework):
# Keep node connected and let it try to reorg the chain
good_tip = self.nodes[0].getbestblockhash()
self.nodes[0].invalidateblock(self.nodes[0].getbestblockhash())
# Restart it so that it forgets all the chainlocks from the past
stop_node(self.nodes[0], 0)
self.nodes[0] = start_node(0, self.options.tmpdir, self.extra_args)
connect_nodes(self.nodes[0], 1)
self.nodes[0].invalidateblock(self.nodes[0].getbestblockhash())
# Now try to reorg the chain
self.nodes[0].generate(2)
sleep(2)
sleep(6)
assert(self.nodes[1].getbestblockhash() == good_tip)
self.nodes[0].generate(2)
sleep(2)
sleep(6)
assert(self.nodes[1].getbestblockhash() == good_tip)
# Now let the node which is on the wrong chain reorg back to the locked chain

View File

@ -40,7 +40,8 @@ void CChainLocksHandler::Start()
{
quorumSigningManager->RegisterRecoveredSigsListener(this);
scheduler->scheduleEvery([&]() {
// regularely retry signing the current chaintip as it might have failed before due to missing ixlocks
EnforceBestChainLock();
// regularly retry signing the current chaintip as it might have failed before due to missing ixlocks
TrySignChainTip();
}, 5000);
}
@ -151,52 +152,47 @@ void CChainLocksHandler::ProcessNewChainLock(NodeId from, const llmq::CChainLock
bestChainLockBlockIndex = pindex;
}
EnforceBestChainLock();
scheduler->scheduleFromNow([&]() {
EnforceBestChainLock();
}, 0);
LogPrintf("CChainLocksHandler::%s -- processed new CLSIG (%s), peer=%d\n",
__func__, clsig.ToString(), from);
if (lastNotifyChainLockBlockIndex != bestChainLockBlockIndex) {
lastNotifyChainLockBlockIndex = bestChainLockBlockIndex;
GetMainSignals().NotifyChainLock(bestChainLockBlockIndex);
}
}
void CChainLocksHandler::AcceptedBlockHeader(const CBlockIndex* pindexNew)
{
bool doEnforce = false;
{
LOCK2(cs_main, cs);
LOCK2(cs_main, cs);
if (pindexNew->GetBlockHash() == bestChainLock.blockHash) {
LogPrintf("CChainLocksHandler::%s -- block header %s came in late, updating and enforcing\n", __func__, pindexNew->GetBlockHash().ToString());
if (pindexNew->GetBlockHash() == bestChainLock.blockHash) {
LogPrintf("CChainLocksHandler::%s -- block header %s came in late, updating and enforcing\n", __func__, pindexNew->GetBlockHash().ToString());
if (bestChainLock.nHeight != pindexNew->nHeight) {
// Should not happen, same as the conflict check from ProcessNewChainLock.
LogPrintf("CChainLocksHandler::%s -- height of CLSIG (%s) does not match the specified block's height (%d)\n",
__func__, bestChainLock.ToString(), pindexNew->nHeight);
return;
}
bestChainLockBlockIndex = pindexNew;
doEnforce = true;
if (bestChainLock.nHeight != pindexNew->nHeight) {
// Should not happen, same as the conflict check from ProcessNewChainLock.
LogPrintf("CChainLocksHandler::%s -- height of CLSIG (%s) does not match the specified block's height (%d)\n",
__func__, bestChainLock.ToString(), pindexNew->nHeight);
return;
}
}
if (doEnforce) {
EnforceBestChainLock();
// when EnforceBestChainLock is called later, it might end up invalidating other chains but not activating the
// CLSIG locked chain. This happens when only the header is known but the block is still missing yet. The usual
// block processing logic will handle this when the block arrives
bestChainLockBlockIndex = pindexNew;
}
}
void CChainLocksHandler::UpdatedBlockTip(const CBlockIndex* pindexNew, const CBlockIndex* pindexFork)
{
// don't call TrySignChainTip directly but instead let the scheduler call it. This way we ensure that cs_main is
// never locked and TrySignChainTip is not called twice in parallel
// never locked and TrySignChainTip is not called twice in parallel. Also avoids recursive calls due to
// EnforceBestChainLock switching chains.
LOCK(cs);
if (tryLockChainTipScheduled) {
return;
}
tryLockChainTipScheduled = true;
scheduler->scheduleFromNow([&]() {
EnforceBestChainLock();
TrySignChainTip();
LOCK(cs);
tryLockChainTipScheduled = false;
@ -231,17 +227,6 @@ void CChainLocksHandler::TrySignChainTip()
{
LOCK(cs);
if (bestChainLockBlockIndex == pindex) {
// we first got the CLSIG, then the header, and then the block was connected.
// In this case there is no need to continue here.
// However, NotifyChainLock might not have been called yet, so call it now if needed
if (lastNotifyChainLockBlockIndex != bestChainLockBlockIndex) {
lastNotifyChainLockBlockIndex = bestChainLockBlockIndex;
GetMainSignals().NotifyChainLock(bestChainLockBlockIndex);
}
return;
}
if (pindex->nHeight == lastSignedHeight) {
// already signed this one
return;
@ -253,12 +238,8 @@ void CChainLocksHandler::TrySignChainTip()
}
if (InternalHasConflictingChainLock(pindex->nHeight, pindex->GetBlockHash())) {
if (!inEnforceBestChainLock) {
// we accepted this block when there was no lock yet, but now a conflicting lock appeared. Invalidate it.
LogPrintf("CChainLocksHandler::%s -- conflicting lock after block was accepted, invalidating now\n",
__func__);
ScheduleInvalidateBlock(pindex);
}
// don't sign if another conflicting CLSIG is already present. EnforceBestChainLock will later enforce
// the correct chain.
return;
}
}
@ -403,23 +384,30 @@ bool CChainLocksHandler::IsTxSafeForMining(const uint256& txid)
}
// WARNING: cs_main and cs should not be held!
// This should also not be called from validation signals, as this might result in recursive calls
void CChainLocksHandler::EnforceBestChainLock()
{
CChainLockSig clsig;
const CBlockIndex* pindex;
const CBlockIndex* currentBestChainLockBlockIndex;
{
LOCK(cs);
clsig = bestChainLockWithKnownBlock;
pindex = bestChainLockBlockIndex;
pindex = currentBestChainLockBlockIndex = this->bestChainLockBlockIndex;
if (!currentBestChainLockBlockIndex) {
// we don't have the header/block, so we can't do anything right now
return;
}
}
bool activateNeeded;
{
LOCK(cs_main);
// Go backwards through the chain referenced by clsig until we find a block that is part of the main chain.
// For each of these blocks, check if there are children that are NOT part of the chain referenced by clsig
// and invalidate each of them.
inEnforceBestChainLock = true; // avoid unnecessary ScheduleInvalidateBlock calls inside UpdatedBlockTip
while (pindex && !chainActive.Contains(pindex)) {
// Invalidate all blocks that have the same prevBlockHash but are not equal to blockHash
auto itp = mapPrevBlockIndex.equal_range(pindex->pprev->GetBlockHash());
@ -434,14 +422,34 @@ void CChainLocksHandler::EnforceBestChainLock()
pindex = pindex->pprev;
}
inEnforceBestChainLock = false;
// In case blocks from the correct chain are invalid at the moment, reconsider them. The only case where this
// can happen right now is when missing superblock triggers caused the main chain to be dismissed first. When
// the trigger later appears, this should bring us to the correct chain eventually. Please note that this does
// NOT enforce invalid blocks in any way, it just causes re-validation.
if (!currentBestChainLockBlockIndex->IsValid()) {
ResetBlockFailureFlags(mapBlockIndex.at(currentBestChainLockBlockIndex->GetBlockHash()));
}
activateNeeded = chainActive.Tip()->GetAncestor(currentBestChainLockBlockIndex->nHeight) != currentBestChainLockBlockIndex;
}
CValidationState state;
if (!ActivateBestChain(state, Params())) {
if (activateNeeded && !ActivateBestChain(state, Params())) {
LogPrintf("CChainLocksHandler::%s -- ActivateBestChain failed: %s\n", __func__, FormatStateMessage(state));
// This should not have happened and we are in a state were it's not safe to continue anymore
assert(false);
}
const CBlockIndex* pindexNotify = nullptr;
{
LOCK(cs_main);
if (lastNotifyChainLockBlockIndex != currentBestChainLockBlockIndex &&
chainActive.Tip()->GetAncestor(currentBestChainLockBlockIndex->nHeight) == currentBestChainLockBlockIndex) {
lastNotifyChainLockBlockIndex = currentBestChainLockBlockIndex;
pindexNotify = currentBestChainLockBlockIndex;
}
}
if (pindexNotify) {
GetMainSignals().NotifyChainLock(pindexNotify);
}
}
@ -471,16 +479,6 @@ void CChainLocksHandler::HandleNewRecoveredSig(const llmq::CRecoveredSig& recove
ProcessNewChainLock(-1, clsig, ::SerializeHash(clsig));
}
void CChainLocksHandler::ScheduleInvalidateBlock(const CBlockIndex* pindex)
{
// Calls to InvalidateBlock and ActivateBestChain might result in re-invocation of the UpdatedBlockTip and other
// signals, so we can't directly call it from signal handlers. We solve this by doing the call from the scheduler
scheduler->scheduleFromNow([this, pindex]() {
DoInvalidateBlock(pindex, true);
}, 0);
}
// WARNING, do not hold cs while calling this method as we'll otherwise run into a deadlock
void CChainLocksHandler::DoInvalidateBlock(const CBlockIndex* pindex, bool activateBestChain)
{

View File

@ -53,7 +53,6 @@ private:
CScheduler* scheduler;
CCriticalSection cs;
bool tryLockChainTipScheduled{false};
std::atomic<bool> inEnforceBestChainLock{false};
uint256 bestChainLockHash;
CChainLockSig bestChainLock;
@ -104,7 +103,6 @@ private:
bool InternalHasChainLock(int nHeight, const uint256& blockHash);
bool InternalHasConflictingChainLock(int nHeight, const uint256& blockHash);
void ScheduleInvalidateBlock(const CBlockIndex* pindex);
void DoInvalidateBlock(const CBlockIndex* pindex, bool activateBestChain);
void Cleanup();

View File

@ -553,7 +553,7 @@ void CDKGSessionHandler::PhaseHandlerThread()
status.aborted = true;
return true;
});
LogPrintf("CDKGSessionHandler::%s -- aborted current DKG session\n", __func__);
LogPrintf("CDKGSessionHandler::%s -- aborted current DKG session for llmq=%s\n", __func__, params.name);
}
}
}

View File

@ -2888,6 +2888,11 @@ bool ActivateBestChain(CValidationState &state, const CChainParams& chainparams,
// sanely for performance or correctness!
AssertLockNotHeld(cs_main);
// make sure that no matter what, only one thread is executing ActivateBestChain. This avoids a race condition when
// validation signals are invoked, which might result in out-of-order execution.
static CCriticalSection cs_activateBestChain;
LOCK(cs_activateBestChain);
CBlockIndex *pindexMostWork = NULL;
CBlockIndex *pindexNewTip = NULL;
do {