Merge bitcoin/bitcoin#22577: Close minor startup race between main and scheduler threads

703b1e612a4bd4521e20ae21eb8fb7c19f4ef942 Close minor startup race between main and scheduler threads (Larry Ruane)

Pull request description:

  This is a low-priority bug fix. The scheduler thread runs `CheckForStaleTipAndEvictPeers()` every 45 seconds (EXTRA_PEER_CHECK_INTERVAL). If its first run happens before the active chain is set up (`CChain::SetTip()`), `bitcoind` will assert:
  ```
  (...)
  2021-07-28T22:16:49Z init message: Loading block index…
  bitcoind: validation.cpp:4968: CChainState& ChainstateManager::ActiveChainstate() const: Assertion `m_active_chainstate' failed.
  Aborted (core dumped)
  ```
  I ran into this while using the debugger to investigate an unrelated problem. Single-stepping through threads with a debugger can cause the relative thread execution timing to be very different than usual. I don't think any automated tests are needed for this PR. I'll give reproduction steps in the next PR comment.

ACKs for top commit:
  MarcoFalke:
    cr ACK 703b1e612a4bd4521e20ae21eb8fb7c19f4ef942
  tryphe:
    tested ACK 703b1e612a4bd4521e20ae21eb8fb7c19f4ef942
  0xB10C:
    ACK 703b1e612a4bd4521e20ae21eb8fb7c19f4ef942
  glozow:
    code review ACK 703b1e612a4bd4521e20ae21eb8fb7c19f4ef942 - it makes sense to me to start peerman's background tasks here, after `chainstate->LoadChainTip()` and `node.connman->Start()` have been called.

Tree-SHA512: 9316ad768cba3b171f62e2eb400e3790af66c47d1886d7965edb38d9710fc8c8f8e4fb38232811c9346732ce311d39f740c5c2aaf5f6ca390ddc48c51a8d633b
This commit is contained in:
MarcoFalke 2021-08-04 16:37:01 +02:00 committed by pasta
parent 4be68dd34a
commit 5057ebb4f1
No known key found for this signature in database
GPG Key ID: 52527BEDABE87984
5 changed files with 22 additions and 12 deletions

View File

@ -1637,7 +1637,7 @@ bool AppInitMain(NodeContext& node, interfaces::BlockAndHeaderTipInfo* tip_info)
assert(!node.peerman);
node.peerman = PeerManager::make(chainparams, *node.connman, *node.addrman, node.banman.get(),
*node.scheduler, chainman, *node.mempool, *node.mn_metaman, *node.mn_sync,
chainman, *node.mempool, *node.mn_metaman, *node.mn_sync,
*node.govman, *node.sporkman, node.mn_activeman.get(), node.dmnman,
node.cj_ctx, node.llmq_ctx, ignores_incoming_txs);
RegisterValidationInterface(node.peerman.get());
@ -2467,6 +2467,8 @@ bool AppInitMain(NodeContext& node, interfaces::BlockAndHeaderTipInfo* tip_info)
banman->DumpBanlist();
}, DUMP_BANS_INTERVAL);
if (node.peerman) node.peerman->StartScheduledTasks(*node.scheduler);
#if HAVE_SYSTEM
StartupNotify(args);
#endif

View File

@ -368,7 +368,7 @@ class PeerManagerImpl final : public PeerManager
{
public:
PeerManagerImpl(const CChainParams& chainparams, CConnman& connman, AddrMan& addrman, BanMan* banman,
CScheduler &scheduler, ChainstateManager& chainman, CTxMemPool& pool,
ChainstateManager& chainman, CTxMemPool& pool,
CMasternodeMetaMan& mn_metaman, CMasternodeSync& mn_sync,
CGovernanceManager& govman, CSporkManager& sporkman,
const CActiveMasternodeManager* const mn_activeman,
@ -397,6 +397,7 @@ public:
EXCLUSIVE_LOCKS_REQUIRED(!m_peer_mutex, !m_recent_confirmed_transactions_mutex);
/** Implement PeerManager */
void StartScheduledTasks(CScheduler& scheduler) override;
void CheckForStaleTipAndEvictPeers() override;
std::optional<std::string> FetchBlock(NodeId peer_id, const CBlockIndex& block_index) override;
bool GetNodeStateStats(NodeId nodeid, CNodeStateStats& stats) const override EXCLUSIVE_LOCKS_REQUIRED(!m_peer_mutex);
@ -1893,7 +1894,7 @@ std::optional<std::string> PeerManagerImpl::FetchBlock(NodeId peer_id, const CBl
}
std::unique_ptr<PeerManager> PeerManager::make(const CChainParams& chainparams, CConnman& connman, AddrMan& addrman, BanMan* banman,
CScheduler &scheduler, ChainstateManager& chainman, CTxMemPool& pool,
ChainstateManager& chainman, CTxMemPool& pool,
CMasternodeMetaMan& mn_metaman, CMasternodeSync& mn_sync,
CGovernanceManager& govman, CSporkManager& sporkman,
const CActiveMasternodeManager* const mn_activeman,
@ -1901,11 +1902,11 @@ std::unique_ptr<PeerManager> PeerManager::make(const CChainParams& chainparams,
const std::unique_ptr<CJContext>& cj_ctx,
const std::unique_ptr<LLMQContext>& llmq_ctx, bool ignore_incoming_txs)
{
return std::make_unique<PeerManagerImpl>(chainparams, connman, addrman, banman, scheduler, chainman, pool, mn_metaman, mn_sync, govman, sporkman, mn_activeman, dmnman, cj_ctx, llmq_ctx, ignore_incoming_txs);
return std::make_unique<PeerManagerImpl>(chainparams, connman, addrman, banman, chainman, pool, mn_metaman, mn_sync, govman, sporkman, mn_activeman, dmnman, cj_ctx, llmq_ctx, ignore_incoming_txs);
}
PeerManagerImpl::PeerManagerImpl(const CChainParams& chainparams, CConnman& connman, AddrMan& addrman, BanMan* banman,
CScheduler &scheduler, ChainstateManager& chainman, CTxMemPool& pool,
ChainstateManager& chainman, CTxMemPool& pool,
CMasternodeMetaMan& mn_metaman, CMasternodeSync& mn_sync,
CGovernanceManager& govman, CSporkManager& sporkman,
const CActiveMasternodeManager* const mn_activeman,
@ -1928,6 +1929,10 @@ PeerManagerImpl::PeerManagerImpl(const CChainParams& chainparams, CConnman& conn
m_sporkman(sporkman),
m_mn_activeman(mn_activeman),
m_ignore_incoming_txs(ignore_incoming_txs)
{
}
void PeerManagerImpl::StartScheduledTasks(CScheduler& scheduler)
{
// Stale tip checking and peer eviction are on two different timers, but we
// don't want them to get out of sync due to drift in the scheduler, so we

View File

@ -57,7 +57,7 @@ class PeerManager : public CValidationInterface, public NetEventsInterface
{
public:
static std::unique_ptr<PeerManager> make(const CChainParams& chainparams, CConnman& connman, AddrMan& addrman,
BanMan* banman, CScheduler &scheduler, ChainstateManager& chainman,
BanMan* banman, ChainstateManager& chainman,
CTxMemPool& pool, CMasternodeMetaMan& mn_metaman, CMasternodeSync& mn_sync,
CGovernanceManager& govman, CSporkManager& sporkman,
const CActiveMasternodeManager* const mn_activeman,
@ -75,6 +75,9 @@ public:
*/
virtual std::optional<std::string> FetchBlock(NodeId peer_id, const CBlockIndex& block_index) = 0;
/** Begin running background tasks, should only be called once */
virtual void StartScheduledTasks(CScheduler& scheduler) = 0;
/** Get statistics from node state */
virtual bool GetNodeStateStats(NodeId nodeid, CNodeStateStats& stats) const = 0;

View File

@ -62,7 +62,7 @@ BOOST_AUTO_TEST_CASE(outbound_slow_chain_eviction)
auto connman = std::make_unique<CConnman>(0x1337, 0x1337, *m_node.addrman);
// Disable inactivity checks for this test to avoid interference
static_cast<ConnmanTestMsg*>(connman.get())->SetPeerConnectTimeout(99999s);
auto peerLogic = PeerManager::make(chainparams, *connman, *m_node.addrman, nullptr, *m_node.scheduler,
auto peerLogic = PeerManager::make(chainparams, *connman, *m_node.addrman, nullptr,
*m_node.chainman, *m_node.mempool, *m_node.mn_metaman, *m_node.mn_sync,
*m_node.govman, *m_node.sporkman, /* mn_activeman = */ nullptr, m_node.dmnman,
m_node.cj_ctx, m_node.llmq_ctx, /* ignore_incoming_txs = */ false);
@ -153,7 +153,7 @@ BOOST_AUTO_TEST_CASE(stale_tip_peer_management)
NodeId id{0};
const CChainParams& chainparams = Params();
auto connman = std::make_unique<ConnmanTestMsg>(0x1337, 0x1337, *m_node.addrman);
auto peerLogic = PeerManager::make(chainparams, *connman, *m_node.addrman, nullptr, *m_node.scheduler,
auto peerLogic = PeerManager::make(chainparams, *connman, *m_node.addrman, nullptr,
*m_node.chainman, *m_node.mempool, *m_node.mn_metaman, *m_node.mn_sync,
*m_node.govman, *m_node.sporkman, /* mn_activeman = */ nullptr, m_node.dmnman,
m_node.cj_ctx, m_node.llmq_ctx, /* ignore_incoming_txs = */ false);
@ -233,7 +233,7 @@ BOOST_AUTO_TEST_CASE(block_relay_only_eviction)
NodeId id{0};
const CChainParams& chainparams = Params();
auto connman = std::make_unique<ConnmanTestMsg>(0x1337, 0x1337, *m_node.addrman);
auto peerLogic = PeerManager::make(chainparams, *connman, *m_node.addrman, nullptr, *m_node.scheduler,
auto peerLogic = PeerManager::make(chainparams, *connman, *m_node.addrman, nullptr,
*m_node.chainman, *m_node.mempool, *m_node.mn_metaman, *m_node.mn_sync,
*m_node.govman, *m_node.sporkman, /* mn_activeman = */ nullptr, m_node.dmnman,
m_node.cj_ctx, m_node.llmq_ctx, /* ignore_incoming_txs = */ false);
@ -298,7 +298,7 @@ BOOST_AUTO_TEST_CASE(peer_discouragement)
const CChainParams& chainparams = Params();
auto banman = std::make_unique<BanMan>(m_args.GetDataDirBase() / "banlist", nullptr, DEFAULT_MISBEHAVING_BANTIME);
auto connman = std::make_unique<ConnmanTestMsg>(0x1337, 0x1337, *m_node.addrman);
auto peerLogic = PeerManager::make(chainparams, *connman, *m_node.addrman, banman.get(), *m_node.scheduler,
auto peerLogic = PeerManager::make(chainparams, *connman, *m_node.addrman, banman.get(),
*m_node.chainman, *m_node.mempool, *m_node.mn_metaman, *m_node.mn_sync,
*m_node.govman, *m_node.sporkman, /* mn_activeman = */ nullptr, m_node.dmnman,
m_node.cj_ctx, m_node.llmq_ctx, /* ignore_incoming_txs = */ false);
@ -416,7 +416,7 @@ BOOST_AUTO_TEST_CASE(DoS_bantime)
const CChainParams& chainparams = Params();
auto banman = std::make_unique<BanMan>(m_args.GetDataDirBase() / "banlist", nullptr, DEFAULT_MISBEHAVING_BANTIME);
auto connman = std::make_unique<CConnman>(0x1337, 0x1337, *m_node.addrman);
auto peerLogic = PeerManager::make(chainparams, *connman, *m_node.addrman, banman.get(), *m_node.scheduler,
auto peerLogic = PeerManager::make(chainparams, *connman, *m_node.addrman, banman.get(),
*m_node.chainman, *m_node.mempool, *m_node.mn_metaman, *m_node.mn_sync,
*m_node.govman, *m_node.sporkman, /* mn_activeman = */ nullptr, m_node.dmnman,
m_node.cj_ctx, m_node.llmq_ctx, /* ignore_incoming_txs = */ false);

View File

@ -281,7 +281,7 @@ TestingSetup::TestingSetup(const std::string& chainName, const std::vector<const
m_node.banman = std::make_unique<BanMan>(m_args.GetDataDirBase() / "banlist", nullptr, DEFAULT_MISBEHAVING_BANTIME);
m_node.peerman = PeerManager::make(chainparams, *m_node.connman, *m_node.addrman, m_node.banman.get(),
*m_node.scheduler, *m_node.chainman, *m_node.mempool, *m_node.mn_metaman, *m_node.mn_sync,
*m_node.chainman, *m_node.mempool, *m_node.mn_metaman, *m_node.mn_sync,
*m_node.govman, *m_node.sporkman, /* mn_activeman = */ nullptr, m_node.dmnman,
m_node.cj_ctx, m_node.llmq_ctx, /* ignore_incoming_txs = */ false);
{