Merge #6470: fix: wrong lock order, observed locally on top of #6467 with custom changes

1b0af99fd5 fix: wrong lock order, observed locally on top of #6467 with custom changes (Konstantin Akimov)

Pull request description:

  ## Issue being fixed or feature implemented

      Assertion failed: detected inconsistent lock order for 'cs_main' in node/blockstorage.cpp:775 (in thread 'httpworker.2'), details in debug log.
         Previous lock order was:
          (2) 'cs_main' in rpc/net.cpp:666 (in thread 'httpworker.3')
          (1) 'm_nodes_mutex' in net.cpp:4754 (in thread 'httpworker.3')
         Current lock order is:
          (1) 'm_nodes_mutex' in net.cpp:5102 (in thread 'httpworker.2')
          (2) 'cs_main' in node/blockstorage.cpp:775 (in thread 'httpworker.2')
      node0 2024-12-09T07:46:49.907123Z (mocktime: 2014-12-04T18:34:20Z) [httpworker.2] [stacktraces.cpp:528] [PrintCrashInfo] Posix Signal: Aborted

      ...
       15#: (0x5764EB18DE42) sync.cpp:123           - potential_deadlock_detected
       16#: (0x5764EB194A7E) sync.cpp:190           - push_lock<std::recursive_mutex>
       17#: (0x5764EB194A7E) sync.cpp:214           - void EnterCritical<std::recursive_mutex>(char const*, char const*, int, std::recursive_mutex*, bool)
       18#: (0x5764EA928642) unique_lock.h:150      - std::unique_lock<std::recursive_mutex>::try_lock()
       19#: (0x5764EA928642) sync.h:168             - UniqueLock<AnnotatedMixin<std::recursive_mutex>, std::unique_lock<std::recursive_mutex> >::Enter(char const*, char const*, int)
       20#: (0x5764EA928642) sync.h:190             - UniqueLock<AnnotatedMixin<std::recursive_mutex>, std::unique_lock<std::recursive_mutex> >::UniqueLock(AnnotatedMixin<std::recursive_mutex>&, char const*, char const*, int, bool)
       21#: (0x5764EAAD9C3D) chain.h:214            - CBlockIndex::GetBlockPos() const
       22#: (0x5764EAAD9C3D) blockstorage.cpp:775   - operator()
       23#: (0x5764EAAD9C3D) blockstorage.cpp:775   - ReadBlockFromDisk(CBlock&, CBlockIndex const*, Consensus::Params const&)
       24#: (0x5764EADB117A) stl_vector.h:1126      - std::vector<std::shared_ptr<CTransaction const>, std::allocator<std::shared_ptr<CTransaction const> > >::operator[](unsigned long)
       25#: (0x5764EADB117A) cbtx.cpp:467           - GetNonNullCoinbaseChainlock(CBlockIndex const*)
       26#: (0x5764EA9FE4F8) utils.cpp:84           - GetHashModifier
       27#: (0x5764EAA05291) utils.cpp:189          - ComputeQuorumMembers
       28#: (0x5764EAA05291) utils.cpp:167          - llmq::utils::GetAllQuorumMembers(Consensus::LLMQType, CDeterministicMNManager&, gsl::not_null<CBlockIndex const*>, bool)
       29#: (0x5764EA9906FC) stl_vector.h:1258      - std::vector<std::shared_ptr<CDeterministicMN const>, std::allocator<std::shared_ptr<CDeterministicMN const> > >::data()
       30#: (0x5764EA9906FC) span.h:164             - Span<std::shared_ptr<CDeterministicMN const> >::Span<std::vector<std::shared_ptr<CDeterministicMN const>, std::allocator<std::shared_ptr<CDeterministicMN const> > > >(std::vector<std::shared_ptr<CDeterministicMN const>, std::allocator<std::shared_ptr<CDeterministicMN const> > >&, std::enable_if<((!Span<std::shared_ptr<CDeterministicMN const> >::is_Span<std::vector<std::shared_ptr<CDeterministicMN const>, std::allocator<std::shared_ptr<CDeterministicMN const> > > >::value)&&std::is_convertible<std::remove_pointer<decltype ((((declval<std::vector<std::shared_ptr<CDeterministicMN const>, std::allocator<std::shared_ptr<CDeterministicMN const> > >&>)()).data)())>::type (*) [], std::shared_ptr<CDeterministicMN const> (*) []>::value)&&std::is_convertible<decltype ((((declval<std::vector<std::shared_ptr<CDeterministicMN const>, std::allocator<std::shared_ptr<CDeterministicMN const> > >&>)()).size)()), unsigned long>::value, decltype(nullptr)>::type)
       31#: (0x5764EA9906FC) quorums.cpp:412        - llmq::CQuorumManager::BuildQuorumFromCommitment(Consensus::LLMQType, gsl::not_null<CBlockIndex const*>, bool) const
       32#: (0x5764EA99198B) shared_ptr_base.h:1540 - std::__shared_ptr<llmq::CQuorum const, (__gnu_cxx::_Lock_policy)2>::__shared_ptr<llmq::CQuorum, void>(std::__shared_ptr<llmq::CQuorum, (__gnu_cxx::_Lock_policy)2>&&)
       33#: (0x5764EA99198B) shared_ptr.h:369       - std::shared_ptr<llmq::CQuorum const>::shared_ptr<llmq::CQuorum, void>(std::shared_ptr<llmq::CQuorum>&&)
       34#: (0x5764EA99198B) quorums.cpp:672        - llmq::CQuorumManager::GetQuorum(Consensus::LLMQType, gsl::not_null<CBlockIndex const*>, bool) const
       35#: (0x5764EA991BD6) shared_ptr_base.h:1070 - std::__shared_count<(__gnu_cxx::_Lock_policy)2>::~__shared_count()
       36#: (0x5764EA991BD6) shared_ptr_base.h:1524 - std::__shared_ptr<llmq::CQuorum const, (__gnu_cxx::_Lock_policy)2>::~__shared_ptr()
       37#: (0x5764EA991BD6) shared_ptr.h:175       - std::shared_ptr<llmq::CQuorum const>::~shared_ptr()
       38#: (0x5764EA991BD6) quorums.cpp:494        - llmq::CQuorumManager::RequestQuorumData(CNode*, Consensus::LLMQType, CBlockIndex const*, unsigned short, uint256 const&) const
       ...

  ## What was done?
  Refactored call of GetQuorum out of `llmq::CQuorumManager::RequestQuorumData`. It also optimize `CQuorumManager::StartQuorumDataRecoveryThread` because avoid double call of `GetQuorum`

  ## How Has This Been Tested?
  Run with and without this changes.

  ## Breaking Changes
  You may observe error `RPC_INVALID_PARAMETER, "quorum not found"` while calling RPC `quorum getdata` instead returning `false` with log output only in case of request for non-existing quorum.
  The output is not documented at the moment anyway.

  ## Checklist:
  - [x] I have performed a self-review of my own code
  - [ ] I have commented my code, particularly in hard-to-understand areas
  - [ ] I have added or updated relevant unit/integration/functional/e2e tests
  - [ ] I have made corresponding changes to the documentation
  - [x] I have assigned this pull request to a milestone

ACKs for top commit:
  UdjinM6:
    utACK 1b0af99fd5
  PastaPastaPasta:
    utACK 1b0af99fd5

Tree-SHA512: b1955c7a2caad81f6c68299df513b4b83ff43f1d829d91769ac7d2a7b88985b5e7a86b765cfe90739ced9bac97da8fea23cd5c87cde1fca039d8b3f1a9c91084
This commit is contained in:
pasta 2024-12-10 22:28:58 -06:00
commit 6d97441434
No known key found for this signature in database
GPG Key ID: E2F3D7916E722D38
3 changed files with 14 additions and 18 deletions

View File

@ -469,8 +469,7 @@ bool CQuorumManager::HasQuorum(Consensus::LLMQType llmqType, const CQuorumBlockP
return quorum_block_processor.HasMinedCommitment(llmqType, quorumHash);
}
bool CQuorumManager::RequestQuorumData(CNode* pfrom, CConnman& connman, Consensus::LLMQType llmqType,
const CBlockIndex* pQuorumBaseBlockIndex, uint16_t nDataMask,
bool CQuorumManager::RequestQuorumData(CNode* pfrom, CConnman& connman, CQuorumCPtr pQuorum, uint16_t nDataMask,
const uint256& proTxHash) const
{
if (pfrom == nullptr) {
@ -485,22 +484,20 @@ bool CQuorumManager::RequestQuorumData(CNode* pfrom, CConnman& connman, Consensu
LogPrint(BCLog::LLMQ, "CQuorumManager::%s -- pfrom is not a verified masternode\n", __func__);
return false;
}
const Consensus::LLMQType llmqType = pQuorum->qc->llmqType;
if (!Params().GetLLMQ(llmqType).has_value()) {
LogPrint(BCLog::LLMQ, "CQuorumManager::%s -- Invalid llmqType: %d\n", __func__, ToUnderlying(llmqType));
return false;
}
if (pQuorumBaseBlockIndex == nullptr) {
LogPrint(BCLog::LLMQ, "CQuorumManager::%s -- Invalid pQuorumBaseBlockIndex: nullptr\n", __func__);
return false;
}
if (GetQuorum(llmqType, pQuorumBaseBlockIndex) == nullptr) {
LogPrint(BCLog::LLMQ, "CQuorumManager::%s -- Quorum not found: %s, %d\n", __func__, pQuorumBaseBlockIndex->GetBlockHash().ToString(), ToUnderlying(llmqType));
const CBlockIndex* pindex{pQuorum->m_quorum_base_block_index};
if (pindex == nullptr) {
LogPrint(BCLog::LLMQ, "CQuorumManager::%s -- Invalid m_quorum_base_block_index : nullptr\n", __func__);
return false;
}
LOCK(cs_data_requests);
const CQuorumDataRequestKey key(pfrom->GetVerifiedProRegTxHash(), true, pQuorumBaseBlockIndex->GetBlockHash(), llmqType);
const CQuorumDataRequest request(llmqType, pQuorumBaseBlockIndex->GetBlockHash(), nDataMask, proTxHash);
const CQuorumDataRequestKey key(pfrom->GetVerifiedProRegTxHash(), true, pindex->GetBlockHash(), llmqType);
const CQuorumDataRequest request(llmqType, pindex->GetBlockHash(), nDataMask, proTxHash);
auto [old_pair, inserted] = mapQuorumDataRequests.emplace(key, request);
if (!inserted) {
if (old_pair->second.IsExpired(/*add_bias=*/true)) {
@ -1006,8 +1003,7 @@ void CQuorumManager::StartQuorumDataRecoveryThread(CConnman& connman, const CQuo
return;
}
if (RequestQuorumData(pNode, connman, pQuorum->qc->llmqType, pQuorum->m_quorum_base_block_index,
nDataMask, proTxHash)) {
if (RequestQuorumData(pNode, connman, pQuorum, nDataMask, proTxHash)) {
nTimeLastSuccess = GetTime<std::chrono::seconds>().count();
printLog("Requested");
} else {

View File

@ -277,8 +277,7 @@ public:
static bool HasQuorum(Consensus::LLMQType llmqType, const CQuorumBlockProcessor& quorum_block_processor, const uint256& quorumHash);
bool RequestQuorumData(CNode* pfrom, CConnman& connman, Consensus::LLMQType llmqType,
const CBlockIndex* pQuorumBaseBlockIndex, uint16_t nDataMask,
bool RequestQuorumData(CNode* pfrom, CConnman& connman, const CQuorumCPtr pQuorum, uint16_t nDataMask,
const uint256& proTxHash = uint256()) const;
// all these methods will lock cs_main for a short period of time

View File

@ -793,7 +793,6 @@ static RPCHelpMan quorum_getdata()
[&](const RPCHelpMan& self, const JSONRPCRequest& request) -> UniValue
{
const NodeContext& node = EnsureAnyNodeContext(request.context);
const ChainstateManager& chainman = EnsureChainman(node);
const LLMQContext& llmq_ctx = EnsureLLMQContext(node);
CConnman& connman = EnsureConnman(node);
@ -815,10 +814,12 @@ static RPCHelpMan quorum_getdata()
}
}
const CBlockIndex* pQuorumBaseBlockIndex = WITH_LOCK(cs_main, return chainman.m_blockman.LookupBlockIndex(quorumHash));
const auto quorum = llmq_ctx.qman->GetQuorum(llmqType, quorumHash);
if (!quorum) {
throw JSONRPCError(RPC_INVALID_PARAMETER, "quorum not found");
}
return connman.ForNode(nodeId, [&](CNode* pNode) {
return llmq_ctx.qman->RequestQuorumData(pNode, connman, llmqType, pQuorumBaseBlockIndex, nDataMask, proTxHash);
return llmq_ctx.qman->RequestQuorumData(pNode, connman, quorum, nDataMask, proTxHash);
});
},
};