From 6c07c2c80c17bcc18397b1a703d826e8214b515c Mon Sep 17 00:00:00 2001 From: MarcoFalke Date: Fri, 11 Dec 2020 10:19:44 +0100 Subject: [PATCH] Merge #19858: Periodically make block-relay connections and sync headers b3a515c0bec97633a76bec101af47c3c90c0b749 Clarify comments around outbound peer eviction (Suhas Daftuar) daffaf03fbede6c01287779e464379ee3acb005a Periodically make block-relay connections and sync headers (Suhas Daftuar) 3cc8a7a0f5fa183cd7f0cf5e56f16f9a9d1f2441 Use conn_type to identify block-relay peers, rather than m_tx_relay == nullptr (Suhas Daftuar) 91d61952a82af3e8887e8ae532ecc19d87fe9073 Simplify and clarify extra outbound peer counting (Suhas Daftuar) Pull request description: To make eclipse attacks more difficult, regularly initiate outbound connections and stay connected long enough to sync headers and potentially learn of new blocks. If we learn a new block, rotate out an existing block-relay peer in favor of the new peer. This augments the existing outbound peer rotation that exists -- currently we make new full-relay connections when our tip is stale, which we disconnect after waiting a small time to see if we learn a new block. As block-relay connections use minimal bandwidth, we can make these connections regularly and not just when our tip is stale. Like feeler connections, these connections are not aggressive; whenever our timer fires (once every 5 minutes on average), we'll try to initiate a new block-relay connection as described, but if we fail to connect we just wait for our timer to fire again before repeating with a new peer. ACKs for top commit: ariard: Code Review ACK b3a515c, only change since last time is dropping a useless `cs_main` taking. I manually tested a previous version of the PR, and not substantial change has been introduced since then which would alter behavior IMO. jonatack: Tested ACK b3a515c0bec97633a76bec101af47c3c90c0b749 over several weeks, though this change and behavior could benefit from test coverage and other follow-ups (refactoring, etc.) described in the review feedback. I did not verify the behavior of `m_start_extra_block_relay_peers` only being enabled after initial chain sync. Since my last review, one unneeded `cs_main` lock was removed. Tree-SHA512: 75fc6f8e8003e88e93f86b845caf2d30b8b9c0dbb0a6b8aabe4e24ea4f6327351f736a068a3b2720a8a581b789942a3a47f921e2afdb47e88bc50d078aa37b6f --- src/init.cpp | 2 +- src/net.cpp | 54 +++++++++++++++++++++++++++---- src/net.h | 17 +++++++++- src/net_processing.cpp | 67 +++++++++++++++++++++++++++++++++------ src/net_processing.h | 5 +++ src/test/fuzz/connman.cpp | 2 +- 6 files changed, 127 insertions(+), 20 deletions(-) diff --git a/src/init.cpp b/src/init.cpp index 8671b1f446..eac1715090 100644 --- a/src/init.cpp +++ b/src/init.cpp @@ -248,7 +248,7 @@ void PrepareShutdown(NodeContext& node) // using the other before destroying them. if (node.peerman) UnregisterValidationInterface(node.peerman.get()); // Follow the lock order requirements: - // * CheckForStaleTipAndEvictPeers locks cs_main before indirectly calling GetExtraOutboundCount + // * CheckForStaleTipAndEvictPeers locks cs_main before indirectly calling GetExtraFullOutboundCount // which locks cs_vNodes. // * ProcessMessage locks cs_main and g_cs_orphans before indirectly calling ForEachNode which // locks cs_vNodes. diff --git a/src/net.cpp b/src/net.cpp index 73ce7a69d8..0d21afffa2 100644 --- a/src/net.cpp +++ b/src/net.cpp @@ -2235,9 +2235,9 @@ void CConnman::SetTryNewOutboundPeer(bool flag) // Also exclude peers that haven't finished initial connection handshake yet // (so that we don't decide we're over our desired connection limit, and then // evict some peer that has finished the handshake) -int CConnman::GetExtraOutboundCount() +int CConnman::GetExtraFullOutboundCount() { - int nOutbound = 0; + int full_outbound_peers = 0; { LOCK(cs_vNodes); for (const CNode* pnode : vNodes) { @@ -2245,12 +2245,26 @@ int CConnman::GetExtraOutboundCount() if (pnode->m_masternode_connection) { continue; } - if (pnode->fSuccessfullyConnected && !pnode->fDisconnect && pnode->IsOutboundOrBlockRelayConn() && !pnode->m_masternode_probe_connection) { - ++nOutbound; + if (pnode->fSuccessfullyConnected && !pnode->fDisconnect && pnode->IsFullOutboundConn() && !pnode->m_masternode_probe_connection) { + ++full_outbound_peers; } } } - return std::max(nOutbound - m_max_outbound_full_relay - m_max_outbound_block_relay, 0); + return std::max(full_outbound_peers - m_max_outbound_full_relay, 0); +} + +int CConnman::GetExtraBlockRelayCount() +{ + int block_relay_peers = 0; + { + LOCK(cs_vNodes); + for (const CNode* pnode : vNodes) { + if (pnode->fSuccessfullyConnected && !pnode->fDisconnect && pnode->IsBlockOnlyConn()) { + ++block_relay_peers; + } + } + } + return std::max(block_relay_peers - m_max_outbound_block_relay, 0); } void CConnman::ThreadOpenConnections(const std::vector connect) @@ -2282,6 +2296,7 @@ void CConnman::ThreadOpenConnections(const std::vector connect) // Minimum time before next feeler connection (in microseconds). int64_t nNextFeeler = PoissonNextSend(nStart*1000*1000, FEELER_INTERVAL); + int64_t nNextExtraBlockRelay = PoissonNextSend(nStart*1000*1000, EXTRA_BLOCK_RELAY_ONLY_PEER_INTERVAL); while (!interruptNet) { ProcessAddrFetch(); @@ -2365,8 +2380,9 @@ void CConnman::ThreadOpenConnections(const std::vector connect) // until we hit our block-relay-only peer limit. // GetTryNewOutboundPeer() gets set when a stale tip is detected, so we // try opening an additional OUTBOUND_FULL_RELAY connection. If none of - // these conditions are met, check the nNextFeeler timer to decide if - // we should open a FEELER. + // these conditions are met, check to see if it's time to try an extra + // block-relay-only peer (to confirm our tip is current, see below) or the nNextFeeler + // timer to decide if we should open a FEELER. if (!m_anchors.empty() && (nOutboundBlockRelay < m_max_outbound_block_relay)) { conn_type = ConnectionType::BLOCK_RELAY; @@ -2377,6 +2393,30 @@ void CConnman::ThreadOpenConnections(const std::vector connect) conn_type = ConnectionType::BLOCK_RELAY; } else if (GetTryNewOutboundPeer()) { // OUTBOUND_FULL_RELAY + } else if (nTime > nNextExtraBlockRelay && m_start_extra_block_relay_peers) { + // Periodically connect to a peer (using regular outbound selection + // methodology from addrman) and stay connected long enough to sync + // headers, but not much else. + // + // Then disconnect the peer, if we haven't learned anything new. + // + // The idea is to make eclipse attacks very difficult to pull off, + // because every few minutes we're finding a new peer to learn headers + // from. + // + // This is similar to the logic for trying extra outbound (full-relay) + // peers, except: + // - we do this all the time on a poisson timer, rather than just when + // our tip is stale + // - we potentially disconnect our next-youngest block-relay-only peer, if our + // newest block-relay-only peer delivers a block more recently. + // See the eviction logic in net_processing.cpp. + // + // Because we can promote these connections to block-relay-only + // connections, they do not get their own ConnectionType enum + // (similar to how we deal with extra outbound peers). + nNextExtraBlockRelay = PoissonNextSend(nTime, EXTRA_BLOCK_RELAY_ONLY_PEER_INTERVAL); + conn_type = ConnectionType::BLOCK_RELAY; } else if (nTime > nNextFeeler) { nNextFeeler = PoissonNextSend(nTime, FEELER_INTERVAL); conn_type = ConnectionType::FEELER; diff --git a/src/net.h b/src/net.h index 3eb8d23d41..e229505fcf 100644 --- a/src/net.h +++ b/src/net.h @@ -65,6 +65,8 @@ static const int WARNING_INTERVAL = 10 * 60; static const int FEELER_INTERVAL = 120; /** The maximum number of entries in an 'inv' protocol message */ static const unsigned int MAX_INV_SZ = 50000; +/** Run the extra block-relay-only connection loop once every 5 minutes. **/ +static const int EXTRA_BLOCK_RELAY_ONLY_PEER_INTERVAL = 300; /** The maximum number of addresses from our addrman to return in response to a getaddr message. */ static constexpr size_t MAX_ADDR_TO_SEND = 1000; /** Maximum length of incoming protocol messages (no message over 3 MiB is currently acceptable). */ @@ -488,13 +490,20 @@ public: void SetTryNewOutboundPeer(bool flag); bool GetTryNewOutboundPeer(); + void StartExtraBlockRelayPeers() { + LogPrint(BCLog::NET, "net: enabling extra block-relay-only peers\n"); + m_start_extra_block_relay_peers = true; + } + // Return the number of outbound peers we have in excess of our target (eg, // if we previously called SetTryNewOutboundPeer(true), and have since set // to false, we may have extra peers that we wish to disconnect). This may // return a value less than (num_outbound_connections - num_outbound_slots) // in cases where some outbound connections are not yet fully connected, or // not yet fully disconnected. - int GetExtraOutboundCount(); + int GetExtraFullOutboundCount(); + // Count the number of block-relay-only peers we have over our limit. + int GetExtraBlockRelayCount(); bool AddNode(const std::string& node); bool RemoveAddedNode(const std::string& node); @@ -829,6 +838,12 @@ private: * This takes the place of a feeler connection */ std::atomic_bool m_try_another_outbound_peer; + /** flag for initiating extra block-relay-only peer connections. + * this should only be enabled after initial chain sync has occurred, + * as these connections are intended to be short-lived and low-bandwidth. + */ + std::atomic_bool m_start_extra_block_relay_peers{false}; + std::atomic m_next_send_inv_to_incoming{0}; /** diff --git a/src/net_processing.cpp b/src/net_processing.cpp index f4218af42e..bc9b2e398f 100644 --- a/src/net_processing.cpp +++ b/src/net_processing.cpp @@ -3097,7 +3097,7 @@ void PeerManagerImpl::ProcessMessage( LogPrintf("New outbound peer connected: version: %d, blocks=%d, peer=%d%s (%s)\n", pfrom.nVersion.load(), pfrom.nStartingHeight, pfrom.GetId(), (fLogIPs ? strprintf(", peeraddr=%s", pfrom.addr.ToString()) : ""), - pfrom.RelayAddrsWithConn()? "full-relay" : "block-relay"); + pfrom.IsBlockOnlyConn()? "block-relay" : "full-relay"); } if (!pfrom.m_masternode_probe_connection) { @@ -4607,11 +4607,54 @@ void PeerManagerImpl::ConsiderEviction(CNode& pto, int64_t time_in_seconds) void PeerManagerImpl::EvictExtraOutboundPeers(int64_t time_in_seconds) { - // Check whether we have too many outbound peers - int extra_peers = m_connman.GetExtraOutboundCount(); - if (extra_peers > 0) { - // If we have more outbound peers than we target, disconnect one. - // Pick the outbound peer that least recently announced + // If we have any extra block-relay-only peers, disconnect the youngest unless + // it's given us a block -- in which case, compare with the second-youngest, and + // out of those two, disconnect the peer who least recently gave us a block. + // The youngest block-relay-only peer would be the extra peer we connected + // to temporarily in order to sync our tip; see net.cpp. + // Note that we use higher nodeid as a measure for most recent connection. + if (m_connman.GetExtraBlockRelayCount() > 0) { + std::pair youngest_peer{-1, 0}, next_youngest_peer{-1, 0}; + + m_connman.ForEachNode([&](CNode* pnode) { + if (!pnode->IsBlockOnlyConn() || pnode->fDisconnect) return; + if (pnode->GetId() > youngest_peer.first) { + next_youngest_peer = youngest_peer; + youngest_peer.first = pnode->GetId(); + youngest_peer.second = pnode->nLastBlockTime; + } + }); + NodeId to_disconnect = youngest_peer.first; + if (youngest_peer.second > next_youngest_peer.second) { + // Our newest block-relay-only peer gave us a block more recently; + // disconnect our second youngest. + to_disconnect = next_youngest_peer.first; + } + m_connman.ForNode(to_disconnect, [&](CNode* pnode) EXCLUSIVE_LOCKS_REQUIRED(::cs_main) { + AssertLockHeld(::cs_main); + // Make sure we're not getting a block right now, and that + // we've been connected long enough for this eviction to happen + // at all. + // Note that we only request blocks from a peer if we learn of a + // valid headers chain with at least as much work as our tip. + CNodeState *node_state = State(pnode->GetId()); + if (node_state == nullptr || + (time_in_seconds - pnode->nTimeConnected >= MINIMUM_CONNECT_TIME && node_state->nBlocksInFlight == 0)) { + pnode->fDisconnect = true; + LogPrint(BCLog::NET, "disconnecting extra block-relay-only peer=%d (last block received at time %d)\n", pnode->GetId(), pnode->nLastBlockTime); + return true; + } else { + LogPrint(BCLog::NET, "keeping block-relay-only peer=%d chosen for eviction (connect time: %d, blocks_in_flight: %d)\n", + pnode->GetId(), pnode->nTimeConnected, node_state->nBlocksInFlight); + } + return false; + }); + } + + // Check whether we have too many OUTBOUND_FULL_RELAY peers + if (m_connman.GetExtraFullOutboundCount() > 0) { + // If we have more OUTBOUND_FULL_RELAY peers than we target, disconnect one. + // Pick the OUTBOUND_FULL_RELAY peer that least recently announced // us a new block, with ties broken by choosing the more recent // connection (higher node id) NodeId worst_peer = -1; @@ -4622,14 +4665,13 @@ void PeerManagerImpl::EvictExtraOutboundPeers(int64_t time_in_seconds) // Don't disconnect masternodes just because they were slow in block announcement if (pnode->m_masternode_connection) return; - // Ignore non-outbound peers, or nodes marked for disconnect already - if (!pnode->IsOutboundOrBlockRelayConn() || pnode->fDisconnect) return; + // Only consider OUTBOUND_FULL_RELAY peers that are not already + // marked for disconnection + if (!pnode->IsFullOutboundConn() || pnode->fDisconnect) return; CNodeState *state = State(pnode->GetId()); if (state == nullptr) return; // shouldn't be possible, but just in case // Don't evict our protected peers if (state->m_chain_sync.m_protect) return; - // Don't evict our block-relay-only peers. - if (!pnode->RelayAddrsWithConn()) return; if (state->m_last_block_announcement < oldest_block_announcement || (state->m_last_block_announcement == oldest_block_announcement && pnode->GetId() > worst_peer)) { worst_peer = pnode->GetId(); oldest_block_announcement = state->m_last_block_announcement; @@ -4685,6 +4727,11 @@ void PeerManagerImpl::CheckForStaleTipAndEvictPeers() } m_stale_tip_check_time = time_in_seconds + STALE_CHECK_INTERVAL; } + + if (!m_initial_sync_finished && CanDirectFetch(m_chainparams.GetConsensus())) { + m_connman.StartExtraBlockRelayPeers(); + m_initial_sync_finished = true; + } } namespace { diff --git a/src/net_processing.h b/src/net_processing.h index 266514b230..d62ed641ba 100644 --- a/src/net_processing.h +++ b/src/net_processing.h @@ -75,6 +75,11 @@ public: int64_t nTimeReceived, const std::atomic& interruptMsgProc) = 0; virtual bool IsBanned(NodeId pnode) = 0; + + /** Whether we've completed initial sync yet, for determining when to turn + * on extra block-relay-only peers. */ + bool m_initial_sync_finished{false}; + }; #endif // BITCOIN_NET_PROCESSING_H diff --git a/src/test/fuzz/connman.cpp b/src/test/fuzz/connman.cpp index 8bf1af550f..b26d4460c2 100644 --- a/src/test/fuzz/connman.cpp +++ b/src/test/fuzz/connman.cpp @@ -117,7 +117,7 @@ FUZZ_TARGET_INIT(connman, initialize_connman) }); } (void)connman.GetAddedNodeInfo(); - (void)connman.GetExtraOutboundCount(); + (void)connman.GetExtraFullOutboundCount(); (void)connman.GetLocalServices(); (void)connman.GetMaxOutboundTarget(); (void)connman.GetMaxOutboundTimeframe();