From 1b0af99fd5e69c0050aa982f854f3247927b8671 Mon Sep 17 00:00:00 2001 From: Konstantin Akimov Date: Mon, 9 Dec 2024 14:49:21 +0700 Subject: [PATCH] fix: wrong lock order, observed locally on top of #6467 with custom changes Assertion failed: detected inconsistent lock order for 'cs_main' in node/blockstorage.cpp:775 (in thread 'httpworker.2'), details in debug log. Previous lock order was: (2) 'cs_main' in rpc/net.cpp:666 (in thread 'httpworker.3') (1) 'm_nodes_mutex' in net.cpp:4754 (in thread 'httpworker.3') Current lock order is: (1) 'm_nodes_mutex' in net.cpp:5102 (in thread 'httpworker.2') (2) 'cs_main' in node/blockstorage.cpp:775 (in thread 'httpworker.2') node0 2024-12-09T07:46:49.907123Z (mocktime: 2014-12-04T18:34:20Z) [httpworker.2] [stacktraces.cpp:528] [PrintCrashInfo] Posix Signal: Aborted ... 15#: (0x5764EB18DE42) sync.cpp:123 - potential_deadlock_detected 16#: (0x5764EB194A7E) sync.cpp:190 - push_lock 17#: (0x5764EB194A7E) sync.cpp:214 - void EnterCritical(char const*, char const*, int, std::recursive_mutex*, bool) 18#: (0x5764EA928642) unique_lock.h:150 - std::unique_lock::try_lock() 19#: (0x5764EA928642) sync.h:168 - UniqueLock, std::unique_lock >::Enter(char const*, char const*, int) 20#: (0x5764EA928642) sync.h:190 - UniqueLock, std::unique_lock >::UniqueLock(AnnotatedMixin&, char const*, char const*, int, bool) 21#: (0x5764EAAD9C3D) chain.h:214 - CBlockIndex::GetBlockPos() const 22#: (0x5764EAAD9C3D) blockstorage.cpp:775 - operator() 23#: (0x5764EAAD9C3D) blockstorage.cpp:775 - ReadBlockFromDisk(CBlock&, CBlockIndex const*, Consensus::Params const&) 24#: (0x5764EADB117A) stl_vector.h:1126 - std::vector, std::allocator > >::operator[](unsigned long) 25#: (0x5764EADB117A) cbtx.cpp:467 - GetNonNullCoinbaseChainlock(CBlockIndex const*) 26#: (0x5764EA9FE4F8) utils.cpp:84 - GetHashModifier 27#: (0x5764EAA05291) utils.cpp:189 - ComputeQuorumMembers 28#: (0x5764EAA05291) utils.cpp:167 - llmq::utils::GetAllQuorumMembers(Consensus::LLMQType, CDeterministicMNManager&, gsl::not_null, bool) 29#: (0x5764EA9906FC) stl_vector.h:1258 - std::vector, std::allocator > >::data() 30#: (0x5764EA9906FC) span.h:164 - Span >::Span, std::allocator > > >(std::vector, std::allocator > >&, std::enable_if<((!Span >::is_Span, std::allocator > > >::value)&&std::is_convertible, std::allocator > >&>)()).data)())>::type (*) [], std::shared_ptr (*) []>::value)&&std::is_convertible, std::allocator > >&>)()).size)()), unsigned long>::value, decltype(nullptr)>::type) 31#: (0x5764EA9906FC) quorums.cpp:412 - llmq::CQuorumManager::BuildQuorumFromCommitment(Consensus::LLMQType, gsl::not_null, bool) const 32#: (0x5764EA99198B) shared_ptr_base.h:1540 - std::__shared_ptr::__shared_ptr(std::__shared_ptr&&) 33#: (0x5764EA99198B) shared_ptr.h:369 - std::shared_ptr::shared_ptr(std::shared_ptr&&) 34#: (0x5764EA99198B) quorums.cpp:672 - llmq::CQuorumManager::GetQuorum(Consensus::LLMQType, gsl::not_null, bool) const 35#: (0x5764EA991BD6) shared_ptr_base.h:1070 - std::__shared_count<(__gnu_cxx::_Lock_policy)2>::~__shared_count() 36#: (0x5764EA991BD6) shared_ptr_base.h:1524 - std::__shared_ptr::~__shared_ptr() 37#: (0x5764EA991BD6) shared_ptr.h:175 - std::shared_ptr::~shared_ptr() 38#: (0x5764EA991BD6) quorums.cpp:494 - llmq::CQuorumManager::RequestQuorumData(CNode*, Consensus::LLMQType, CBlockIndex const*, unsigned short, uint256 const&) const ... --- src/llmq/quorums.cpp | 20 ++++++++------------ src/llmq/quorums.h | 3 +-- src/rpc/quorums.cpp | 9 +++++---- 3 files changed, 14 insertions(+), 18 deletions(-) diff --git a/src/llmq/quorums.cpp b/src/llmq/quorums.cpp index 85309f0bd3..67b2890b57 100644 --- a/src/llmq/quorums.cpp +++ b/src/llmq/quorums.cpp @@ -469,8 +469,7 @@ bool CQuorumManager::HasQuorum(Consensus::LLMQType llmqType, const CQuorumBlockP return quorum_block_processor.HasMinedCommitment(llmqType, quorumHash); } -bool CQuorumManager::RequestQuorumData(CNode* pfrom, CConnman& connman, Consensus::LLMQType llmqType, - const CBlockIndex* pQuorumBaseBlockIndex, uint16_t nDataMask, +bool CQuorumManager::RequestQuorumData(CNode* pfrom, CConnman& connman, CQuorumCPtr pQuorum, uint16_t nDataMask, const uint256& proTxHash) const { if (pfrom == nullptr) { @@ -485,22 +484,20 @@ bool CQuorumManager::RequestQuorumData(CNode* pfrom, CConnman& connman, Consensu LogPrint(BCLog::LLMQ, "CQuorumManager::%s -- pfrom is not a verified masternode\n", __func__); return false; } + const Consensus::LLMQType llmqType = pQuorum->qc->llmqType; if (!Params().GetLLMQ(llmqType).has_value()) { LogPrint(BCLog::LLMQ, "CQuorumManager::%s -- Invalid llmqType: %d\n", __func__, ToUnderlying(llmqType)); return false; } - if (pQuorumBaseBlockIndex == nullptr) { - LogPrint(BCLog::LLMQ, "CQuorumManager::%s -- Invalid pQuorumBaseBlockIndex: nullptr\n", __func__); - return false; - } - if (GetQuorum(llmqType, pQuorumBaseBlockIndex) == nullptr) { - LogPrint(BCLog::LLMQ, "CQuorumManager::%s -- Quorum not found: %s, %d\n", __func__, pQuorumBaseBlockIndex->GetBlockHash().ToString(), ToUnderlying(llmqType)); + const CBlockIndex* pindex{pQuorum->m_quorum_base_block_index}; + if (pindex == nullptr) { + LogPrint(BCLog::LLMQ, "CQuorumManager::%s -- Invalid m_quorum_base_block_index : nullptr\n", __func__); return false; } LOCK(cs_data_requests); - const CQuorumDataRequestKey key(pfrom->GetVerifiedProRegTxHash(), true, pQuorumBaseBlockIndex->GetBlockHash(), llmqType); - const CQuorumDataRequest request(llmqType, pQuorumBaseBlockIndex->GetBlockHash(), nDataMask, proTxHash); + const CQuorumDataRequestKey key(pfrom->GetVerifiedProRegTxHash(), true, pindex->GetBlockHash(), llmqType); + const CQuorumDataRequest request(llmqType, pindex->GetBlockHash(), nDataMask, proTxHash); auto [old_pair, inserted] = mapQuorumDataRequests.emplace(key, request); if (!inserted) { if (old_pair->second.IsExpired(/*add_bias=*/true)) { @@ -1006,8 +1003,7 @@ void CQuorumManager::StartQuorumDataRecoveryThread(CConnman& connman, const CQuo return; } - if (RequestQuorumData(pNode, connman, pQuorum->qc->llmqType, pQuorum->m_quorum_base_block_index, - nDataMask, proTxHash)) { + if (RequestQuorumData(pNode, connman, pQuorum, nDataMask, proTxHash)) { nTimeLastSuccess = GetTime().count(); printLog("Requested"); } else { diff --git a/src/llmq/quorums.h b/src/llmq/quorums.h index 6e8db117db..645802efa6 100644 --- a/src/llmq/quorums.h +++ b/src/llmq/quorums.h @@ -277,8 +277,7 @@ public: static bool HasQuorum(Consensus::LLMQType llmqType, const CQuorumBlockProcessor& quorum_block_processor, const uint256& quorumHash); - bool RequestQuorumData(CNode* pfrom, CConnman& connman, Consensus::LLMQType llmqType, - const CBlockIndex* pQuorumBaseBlockIndex, uint16_t nDataMask, + bool RequestQuorumData(CNode* pfrom, CConnman& connman, const CQuorumCPtr pQuorum, uint16_t nDataMask, const uint256& proTxHash = uint256()) const; // all these methods will lock cs_main for a short period of time diff --git a/src/rpc/quorums.cpp b/src/rpc/quorums.cpp index 5289d027a2..e7da6e4a03 100644 --- a/src/rpc/quorums.cpp +++ b/src/rpc/quorums.cpp @@ -793,7 +793,6 @@ static RPCHelpMan quorum_getdata() [&](const RPCHelpMan& self, const JSONRPCRequest& request) -> UniValue { const NodeContext& node = EnsureAnyNodeContext(request.context); - const ChainstateManager& chainman = EnsureChainman(node); const LLMQContext& llmq_ctx = EnsureLLMQContext(node); CConnman& connman = EnsureConnman(node); @@ -815,10 +814,12 @@ static RPCHelpMan quorum_getdata() } } - const CBlockIndex* pQuorumBaseBlockIndex = WITH_LOCK(cs_main, return chainman.m_blockman.LookupBlockIndex(quorumHash)); - + const auto quorum = llmq_ctx.qman->GetQuorum(llmqType, quorumHash); + if (!quorum) { + throw JSONRPCError(RPC_INVALID_PARAMETER, "quorum not found"); + } return connman.ForNode(nodeId, [&](CNode* pNode) { - return llmq_ctx.qman->RequestQuorumData(pNode, connman, llmqType, pQuorumBaseBlockIndex, nDataMask, proTxHash); + return llmq_ctx.qman->RequestQuorumData(pNode, connman, quorum, nDataMask, proTxHash); }); }, };