Merge #19858: Periodically make block-relay connections and sync headers

b3a515c0bec97633a76bec101af47c3c90c0b749 Clarify comments around outbound peer eviction (Suhas Daftuar)
daffaf03fbede6c01287779e464379ee3acb005a Periodically make block-relay connections and sync headers (Suhas Daftuar)
3cc8a7a0f5fa183cd7f0cf5e56f16f9a9d1f2441 Use conn_type to identify block-relay peers, rather than m_tx_relay == nullptr (Suhas Daftuar)
91d61952a82af3e8887e8ae532ecc19d87fe9073 Simplify and clarify extra outbound peer counting (Suhas Daftuar)

Pull request description:

  To make eclipse attacks more difficult, regularly initiate outbound connections
  and stay connected long enough to sync headers and potentially learn of new
  blocks. If we learn a new block, rotate out an existing block-relay peer in
  favor of the new peer.

  This augments the existing outbound peer rotation that exists -- currently we
  make new full-relay connections when our tip is stale, which we disconnect
  after waiting a small time to see if we learn a new block.  As block-relay
  connections use minimal bandwidth, we can make these connections regularly and
  not just when our tip is stale.

  Like feeler connections, these connections are not aggressive; whenever our
  timer fires (once every 5 minutes on average), we'll try to initiate a new
  block-relay connection as described, but if we fail to connect we just wait for
  our timer to fire again before repeating with a new peer.

ACKs for top commit:
  ariard:
    Code Review ACK b3a515c, only change since last time is dropping a useless `cs_main` taking. I manually tested a previous version of the PR, and not substantial change has been introduced since then which would alter behavior IMO.
  jonatack:
    Tested ACK b3a515c0bec97633a76bec101af47c3c90c0b749 over several weeks, though this change and behavior could benefit from test coverage and other follow-ups (refactoring, etc.) described in the review feedback. I did not verify the behavior of `m_start_extra_block_relay_peers` only being enabled after initial chain sync. Since my last review, one unneeded `cs_main` lock was removed.

Tree-SHA512: 75fc6f8e8003e88e93f86b845caf2d30b8b9c0dbb0a6b8aabe4e24ea4f6327351f736a068a3b2720a8a581b789942a3a47f921e2afdb47e88bc50d078aa37b6f
This commit is contained in:
MarcoFalke 2020-12-11 10:19:44 +01:00 committed by pasta
parent 5e23f3506c
commit 6c07c2c80c
No known key found for this signature in database
GPG Key ID: 52527BEDABE87984
6 changed files with 127 additions and 20 deletions

View File

@ -248,7 +248,7 @@ void PrepareShutdown(NodeContext& node)
// using the other before destroying them.
if (node.peerman) UnregisterValidationInterface(node.peerman.get());
// Follow the lock order requirements:
// * CheckForStaleTipAndEvictPeers locks cs_main before indirectly calling GetExtraOutboundCount
// * CheckForStaleTipAndEvictPeers locks cs_main before indirectly calling GetExtraFullOutboundCount
// which locks cs_vNodes.
// * ProcessMessage locks cs_main and g_cs_orphans before indirectly calling ForEachNode which
// locks cs_vNodes.

View File

@ -2235,9 +2235,9 @@ void CConnman::SetTryNewOutboundPeer(bool flag)
// Also exclude peers that haven't finished initial connection handshake yet
// (so that we don't decide we're over our desired connection limit, and then
// evict some peer that has finished the handshake)
int CConnman::GetExtraOutboundCount()
int CConnman::GetExtraFullOutboundCount()
{
int nOutbound = 0;
int full_outbound_peers = 0;
{
LOCK(cs_vNodes);
for (const CNode* pnode : vNodes) {
@ -2245,12 +2245,26 @@ int CConnman::GetExtraOutboundCount()
if (pnode->m_masternode_connection) {
continue;
}
if (pnode->fSuccessfullyConnected && !pnode->fDisconnect && pnode->IsOutboundOrBlockRelayConn() && !pnode->m_masternode_probe_connection) {
++nOutbound;
if (pnode->fSuccessfullyConnected && !pnode->fDisconnect && pnode->IsFullOutboundConn() && !pnode->m_masternode_probe_connection) {
++full_outbound_peers;
}
}
}
return std::max(nOutbound - m_max_outbound_full_relay - m_max_outbound_block_relay, 0);
return std::max(full_outbound_peers - m_max_outbound_full_relay, 0);
}
int CConnman::GetExtraBlockRelayCount()
{
int block_relay_peers = 0;
{
LOCK(cs_vNodes);
for (const CNode* pnode : vNodes) {
if (pnode->fSuccessfullyConnected && !pnode->fDisconnect && pnode->IsBlockOnlyConn()) {
++block_relay_peers;
}
}
}
return std::max(block_relay_peers - m_max_outbound_block_relay, 0);
}
void CConnman::ThreadOpenConnections(const std::vector<std::string> connect)
@ -2282,6 +2296,7 @@ void CConnman::ThreadOpenConnections(const std::vector<std::string> connect)
// Minimum time before next feeler connection (in microseconds).
int64_t nNextFeeler = PoissonNextSend(nStart*1000*1000, FEELER_INTERVAL);
int64_t nNextExtraBlockRelay = PoissonNextSend(nStart*1000*1000, EXTRA_BLOCK_RELAY_ONLY_PEER_INTERVAL);
while (!interruptNet)
{
ProcessAddrFetch();
@ -2365,8 +2380,9 @@ void CConnman::ThreadOpenConnections(const std::vector<std::string> connect)
// until we hit our block-relay-only peer limit.
// GetTryNewOutboundPeer() gets set when a stale tip is detected, so we
// try opening an additional OUTBOUND_FULL_RELAY connection. If none of
// these conditions are met, check the nNextFeeler timer to decide if
// we should open a FEELER.
// these conditions are met, check to see if it's time to try an extra
// block-relay-only peer (to confirm our tip is current, see below) or the nNextFeeler
// timer to decide if we should open a FEELER.
if (!m_anchors.empty() && (nOutboundBlockRelay < m_max_outbound_block_relay)) {
conn_type = ConnectionType::BLOCK_RELAY;
@ -2377,6 +2393,30 @@ void CConnman::ThreadOpenConnections(const std::vector<std::string> connect)
conn_type = ConnectionType::BLOCK_RELAY;
} else if (GetTryNewOutboundPeer()) {
// OUTBOUND_FULL_RELAY
} else if (nTime > nNextExtraBlockRelay && m_start_extra_block_relay_peers) {
// Periodically connect to a peer (using regular outbound selection
// methodology from addrman) and stay connected long enough to sync
// headers, but not much else.
//
// Then disconnect the peer, if we haven't learned anything new.
//
// The idea is to make eclipse attacks very difficult to pull off,
// because every few minutes we're finding a new peer to learn headers
// from.
//
// This is similar to the logic for trying extra outbound (full-relay)
// peers, except:
// - we do this all the time on a poisson timer, rather than just when
// our tip is stale
// - we potentially disconnect our next-youngest block-relay-only peer, if our
// newest block-relay-only peer delivers a block more recently.
// See the eviction logic in net_processing.cpp.
//
// Because we can promote these connections to block-relay-only
// connections, they do not get their own ConnectionType enum
// (similar to how we deal with extra outbound peers).
nNextExtraBlockRelay = PoissonNextSend(nTime, EXTRA_BLOCK_RELAY_ONLY_PEER_INTERVAL);
conn_type = ConnectionType::BLOCK_RELAY;
} else if (nTime > nNextFeeler) {
nNextFeeler = PoissonNextSend(nTime, FEELER_INTERVAL);
conn_type = ConnectionType::FEELER;

View File

@ -65,6 +65,8 @@ static const int WARNING_INTERVAL = 10 * 60;
static const int FEELER_INTERVAL = 120;
/** The maximum number of entries in an 'inv' protocol message */
static const unsigned int MAX_INV_SZ = 50000;
/** Run the extra block-relay-only connection loop once every 5 minutes. **/
static const int EXTRA_BLOCK_RELAY_ONLY_PEER_INTERVAL = 300;
/** The maximum number of addresses from our addrman to return in response to a getaddr message. */
static constexpr size_t MAX_ADDR_TO_SEND = 1000;
/** Maximum length of incoming protocol messages (no message over 3 MiB is currently acceptable). */
@ -488,13 +490,20 @@ public:
void SetTryNewOutboundPeer(bool flag);
bool GetTryNewOutboundPeer();
void StartExtraBlockRelayPeers() {
LogPrint(BCLog::NET, "net: enabling extra block-relay-only peers\n");
m_start_extra_block_relay_peers = true;
}
// Return the number of outbound peers we have in excess of our target (eg,
// if we previously called SetTryNewOutboundPeer(true), and have since set
// to false, we may have extra peers that we wish to disconnect). This may
// return a value less than (num_outbound_connections - num_outbound_slots)
// in cases where some outbound connections are not yet fully connected, or
// not yet fully disconnected.
int GetExtraOutboundCount();
int GetExtraFullOutboundCount();
// Count the number of block-relay-only peers we have over our limit.
int GetExtraBlockRelayCount();
bool AddNode(const std::string& node);
bool RemoveAddedNode(const std::string& node);
@ -829,6 +838,12 @@ private:
* This takes the place of a feeler connection */
std::atomic_bool m_try_another_outbound_peer;
/** flag for initiating extra block-relay-only peer connections.
* this should only be enabled after initial chain sync has occurred,
* as these connections are intended to be short-lived and low-bandwidth.
*/
std::atomic_bool m_start_extra_block_relay_peers{false};
std::atomic<int64_t> m_next_send_inv_to_incoming{0};
/**

View File

@ -3097,7 +3097,7 @@ void PeerManagerImpl::ProcessMessage(
LogPrintf("New outbound peer connected: version: %d, blocks=%d, peer=%d%s (%s)\n",
pfrom.nVersion.load(), pfrom.nStartingHeight,
pfrom.GetId(), (fLogIPs ? strprintf(", peeraddr=%s", pfrom.addr.ToString()) : ""),
pfrom.RelayAddrsWithConn()? "full-relay" : "block-relay");
pfrom.IsBlockOnlyConn()? "block-relay" : "full-relay");
}
if (!pfrom.m_masternode_probe_connection) {
@ -4607,11 +4607,54 @@ void PeerManagerImpl::ConsiderEviction(CNode& pto, int64_t time_in_seconds)
void PeerManagerImpl::EvictExtraOutboundPeers(int64_t time_in_seconds)
{
// Check whether we have too many outbound peers
int extra_peers = m_connman.GetExtraOutboundCount();
if (extra_peers > 0) {
// If we have more outbound peers than we target, disconnect one.
// Pick the outbound peer that least recently announced
// If we have any extra block-relay-only peers, disconnect the youngest unless
// it's given us a block -- in which case, compare with the second-youngest, and
// out of those two, disconnect the peer who least recently gave us a block.
// The youngest block-relay-only peer would be the extra peer we connected
// to temporarily in order to sync our tip; see net.cpp.
// Note that we use higher nodeid as a measure for most recent connection.
if (m_connman.GetExtraBlockRelayCount() > 0) {
std::pair<NodeId, int64_t> youngest_peer{-1, 0}, next_youngest_peer{-1, 0};
m_connman.ForEachNode([&](CNode* pnode) {
if (!pnode->IsBlockOnlyConn() || pnode->fDisconnect) return;
if (pnode->GetId() > youngest_peer.first) {
next_youngest_peer = youngest_peer;
youngest_peer.first = pnode->GetId();
youngest_peer.second = pnode->nLastBlockTime;
}
});
NodeId to_disconnect = youngest_peer.first;
if (youngest_peer.second > next_youngest_peer.second) {
// Our newest block-relay-only peer gave us a block more recently;
// disconnect our second youngest.
to_disconnect = next_youngest_peer.first;
}
m_connman.ForNode(to_disconnect, [&](CNode* pnode) EXCLUSIVE_LOCKS_REQUIRED(::cs_main) {
AssertLockHeld(::cs_main);
// Make sure we're not getting a block right now, and that
// we've been connected long enough for this eviction to happen
// at all.
// Note that we only request blocks from a peer if we learn of a
// valid headers chain with at least as much work as our tip.
CNodeState *node_state = State(pnode->GetId());
if (node_state == nullptr ||
(time_in_seconds - pnode->nTimeConnected >= MINIMUM_CONNECT_TIME && node_state->nBlocksInFlight == 0)) {
pnode->fDisconnect = true;
LogPrint(BCLog::NET, "disconnecting extra block-relay-only peer=%d (last block received at time %d)\n", pnode->GetId(), pnode->nLastBlockTime);
return true;
} else {
LogPrint(BCLog::NET, "keeping block-relay-only peer=%d chosen for eviction (connect time: %d, blocks_in_flight: %d)\n",
pnode->GetId(), pnode->nTimeConnected, node_state->nBlocksInFlight);
}
return false;
});
}
// Check whether we have too many OUTBOUND_FULL_RELAY peers
if (m_connman.GetExtraFullOutboundCount() > 0) {
// If we have more OUTBOUND_FULL_RELAY peers than we target, disconnect one.
// Pick the OUTBOUND_FULL_RELAY peer that least recently announced
// us a new block, with ties broken by choosing the more recent
// connection (higher node id)
NodeId worst_peer = -1;
@ -4622,14 +4665,13 @@ void PeerManagerImpl::EvictExtraOutboundPeers(int64_t time_in_seconds)
// Don't disconnect masternodes just because they were slow in block announcement
if (pnode->m_masternode_connection) return;
// Ignore non-outbound peers, or nodes marked for disconnect already
if (!pnode->IsOutboundOrBlockRelayConn() || pnode->fDisconnect) return;
// Only consider OUTBOUND_FULL_RELAY peers that are not already
// marked for disconnection
if (!pnode->IsFullOutboundConn() || pnode->fDisconnect) return;
CNodeState *state = State(pnode->GetId());
if (state == nullptr) return; // shouldn't be possible, but just in case
// Don't evict our protected peers
if (state->m_chain_sync.m_protect) return;
// Don't evict our block-relay-only peers.
if (!pnode->RelayAddrsWithConn()) return;
if (state->m_last_block_announcement < oldest_block_announcement || (state->m_last_block_announcement == oldest_block_announcement && pnode->GetId() > worst_peer)) {
worst_peer = pnode->GetId();
oldest_block_announcement = state->m_last_block_announcement;
@ -4685,6 +4727,11 @@ void PeerManagerImpl::CheckForStaleTipAndEvictPeers()
}
m_stale_tip_check_time = time_in_seconds + STALE_CHECK_INTERVAL;
}
if (!m_initial_sync_finished && CanDirectFetch(m_chainparams.GetConsensus())) {
m_connman.StartExtraBlockRelayPeers();
m_initial_sync_finished = true;
}
}
namespace {

View File

@ -75,6 +75,11 @@ public:
int64_t nTimeReceived, const std::atomic<bool>& interruptMsgProc) = 0;
virtual bool IsBanned(NodeId pnode) = 0;
/** Whether we've completed initial sync yet, for determining when to turn
* on extra block-relay-only peers. */
bool m_initial_sync_finished{false};
};
#endif // BITCOIN_NET_PROCESSING_H

View File

@ -117,7 +117,7 @@ FUZZ_TARGET_INIT(connman, initialize_connman)
});
}
(void)connman.GetAddedNodeInfo();
(void)connman.GetExtraOutboundCount();
(void)connman.GetExtraFullOutboundCount();
(void)connman.GetLocalServices();
(void)connman.GetMaxOutboundTarget();
(void)connman.GetMaxOutboundTimeframe();