Merge #11560: Connect to a new outbound peer if our tip is stale

6262915 Add unit test for stale tip checking (Suhas Daftuar)
83df257 Add CConnmanTest to mutate g_connman in tests (João Barbosa)
ac7b37c Connect to an extra outbound peer if our tip is stale (Suhas Daftuar)
db32a65 Track tip update time and last new block announcement from each peer (Suhas Daftuar)
2d4327d net: Allow connecting to extra outbound peers (Suhas Daftuar)

Pull request description:

  This is an alternative approach to #11534.  Rather than disconnect an outbound peer when our tip looks stale, instead try to connect to an additional outbound peer.

  Periodically, check to see if we have more outbound peers than we target (ie if any extra peers are in use), and if so, disconnect the one that least recently announced a new block (breaking ties by choosing the newest peer that we connected to).

Tree-SHA512: 8f19e910e0bb36867f81783e020af225f356451899adfc7ade1895d6d3bd5afe51c83759610dfd10c62090c4fe404efa0283b2f63fde0bd7da898a1aaa7fb281
This commit is contained in:
Wladimir J. van der Laan 2017-11-02 20:13:17 +01:00
commit 2f959a5874
No known key found for this signature in database
GPG Key ID: 1E4AED62986CD25D
8 changed files with 313 additions and 8 deletions

View File

@ -1270,7 +1270,7 @@ bool AppInitMain(boost::thread_group& threadGroup, CScheduler& scheduler)
g_connman = std::unique_ptr<CConnman>(new CConnman(GetRand(std::numeric_limits<uint64_t>::max()), GetRand(std::numeric_limits<uint64_t>::max())));
CConnman& connman = *g_connman;
peerLogic.reset(new PeerLogicValidation(&connman));
peerLogic.reset(new PeerLogicValidation(&connman, scheduler));
RegisterValidationInterface(peerLogic.get());
// sanitize comments per BIP-0014, format user agent and check total size

View File

@ -1693,6 +1693,37 @@ void CConnman::ProcessOneShot()
}
}
bool CConnman::GetTryNewOutboundPeer()
{
return m_try_another_outbound_peer;
}
void CConnman::SetTryNewOutboundPeer(bool flag)
{
m_try_another_outbound_peer = flag;
LogPrint(BCLog::NET, "net: setting try another outbound peer=%s\n", flag ? "true" : "false");
}
// Return the number of peers we have over our outbound connection limit
// Exclude peers that are marked for disconnect, or are going to be
// disconnected soon (eg one-shots and feelers)
// Also exclude peers that haven't finished initial connection handshake yet
// (so that we don't decide we're over our desired connection limit, and then
// evict some peer that has finished the handshake)
int CConnman::GetExtraOutboundCount()
{
int nOutbound = 0;
{
LOCK(cs_vNodes);
for (CNode* pnode : vNodes) {
if (!pnode->fInbound && !pnode->m_manual_connection && !pnode->fFeeler && !pnode->fDisconnect && !pnode->fOneShot && pnode->fSuccessfullyConnected) {
++nOutbound;
}
}
}
return std::max(nOutbound - nMaxOutbound, 0);
}
void CConnman::ThreadOpenConnections(const std::vector<std::string> connect)
{
// Connect to specific addresses
@ -1781,7 +1812,8 @@ void CConnman::ThreadOpenConnections(const std::vector<std::string> connect)
// * Only make a feeler connection once every few minutes.
//
bool fFeeler = false;
if (nOutbound >= nMaxOutbound) {
if (nOutbound >= nMaxOutbound && !GetTryNewOutboundPeer()) {
int64_t nTime = GetTimeMicros(); // The current time right now (in microseconds).
if (nTime > nNextFeeler) {
nNextFeeler = PoissonNextSend(nTime, FEELER_INTERVAL);
@ -2204,6 +2236,7 @@ CConnman::CConnman(uint64_t nSeed0In, uint64_t nSeed1In) : nSeed0(nSeed0In), nSe
semOutbound = nullptr;
semAddnode = nullptr;
flagInterruptMsgProc = false;
SetTryNewOutboundPeer(false);
Options connOptions;
Init(connOptions);

View File

@ -251,6 +251,19 @@ public:
void GetBanned(banmap_t &banmap);
void SetBanned(const banmap_t &banmap);
// This allows temporarily exceeding nMaxOutbound, with the goal of finding
// a peer that is better than all our current peers.
void SetTryNewOutboundPeer(bool flag);
bool GetTryNewOutboundPeer();
// Return the number of outbound peers we have in excess of our target (eg,
// if we previously called SetTryNewOutboundPeer(true), and have since set
// to false, we may have extra peers that we wish to disconnect). This may
// return a value less than (num_outbound_connections - num_outbound_slots)
// in cases where some outbound connections are not yet fully connected, or
// not yet fully disconnected.
int GetExtraOutboundCount();
bool AddNode(const std::string& node);
bool RemoveAddedNode(const std::string& node);
std::vector<AddedNodeInfo> GetAddedNodeInfo();
@ -413,6 +426,13 @@ private:
std::thread threadOpenAddedConnections;
std::thread threadOpenConnections;
std::thread threadMessageHandler;
/** flag for deciding to connect to an extra outbound peer,
* in excess of nMaxOutbound
* This takes the place of a feeler connection */
std::atomic_bool m_try_another_outbound_peer;
friend struct CConnmanTest;
};
extern std::unique_ptr<CConnman> g_connman;
void Discover(boost::thread_group& threadGroup);

View File

@ -23,6 +23,7 @@
#include "primitives/transaction.h"
#include "random.h"
#include "reverse_iterator.h"
#include "scheduler.h"
#include "tinyformat.h"
#include "txmempool.h"
#include "ui_interface.h"
@ -127,6 +128,9 @@ namespace {
/** Number of outbound peers with m_chain_sync.m_protect. */
int g_outbound_peers_with_protect_from_disconnect = 0;
/** When our tip was last updated. */
int64_t g_last_tip_update = 0;
/** Relay map, protected by cs_main. */
typedef std::map<uint256, CTransactionRef> MapRelay;
MapRelay mapRelay;
@ -231,6 +235,9 @@ struct CNodeState {
ChainSyncTimeoutState m_chain_sync;
//! Time of last new block announcement
int64_t m_last_block_announcement;
CNodeState(CAddress addrIn, std::string addrNameIn) : address(addrIn), name(addrNameIn) {
fCurrentlyConnected = false;
nMisbehavior = 0;
@ -254,6 +261,7 @@ struct CNodeState {
fWantsCmpctWitness = false;
fSupportsDesiredCmpctVersion = false;
m_chain_sync = { 0, nullptr, false, false };
m_last_block_announcement = 0;
}
};
@ -427,6 +435,15 @@ void MaybeSetPeerAsAnnouncingHeaderAndIDs(NodeId nodeid, CConnman* connman) {
}
}
bool TipMayBeStale(const Consensus::Params &consensusParams)
{
AssertLockHeld(cs_main);
if (g_last_tip_update == 0) {
g_last_tip_update = GetTime();
}
return g_last_tip_update < GetTime() - consensusParams.nPowTargetSpacing * 3 && mapBlocksInFlight.empty();
}
// Requires cs_main
bool CanDirectFetch(const Consensus::Params &consensusParams)
{
@ -533,6 +550,15 @@ void FindNextBlocksToDownload(NodeId nodeid, unsigned int count, std::vector<con
} // namespace
// This function is used for testing the stale tip eviction logic, see
// DoS_tests.cpp
void UpdateLastBlockAnnounceTime(NodeId node, int64_t time_in_seconds)
{
LOCK(cs_main);
CNodeState *state = State(node);
if (state) state->m_last_block_announcement = time_in_seconds;
}
// Returns true for outbound peers, excluding manual connections, feelers, and
// one-shots
bool IsOutboundDisconnectionCandidate(const CNode *node)
@ -764,9 +790,17 @@ static bool StaleBlockRequestAllowed(const CBlockIndex* pindex, const Consensus:
(GetBlockProofEquivalentTime(*pindexBestHeader, *pindex, *pindexBestHeader, consensusParams) < STALE_RELAY_AGE_LIMIT);
}
PeerLogicValidation::PeerLogicValidation(CConnman* connmanIn) : connman(connmanIn) {
PeerLogicValidation::PeerLogicValidation(CConnman* connmanIn, CScheduler &scheduler) : connman(connmanIn), m_stale_tip_check_time(0) {
// Initialize global variables that cannot be constructed at startup.
recentRejects.reset(new CRollingBloomFilter(120000, 0.000001));
const Consensus::Params& consensusParams = Params().GetConsensus();
// Stale tip checking and peer eviction are on two different timers, but we
// don't want them to get out of sync due to drift in the scheduler, so we
// combine them in one function and schedule at the quicker (peer-eviction)
// timer.
static_assert(EXTRA_PEER_CHECK_INTERVAL < STALE_CHECK_INTERVAL, "peer eviction timer should be less than stale tip check timer");
scheduler.scheduleEvery(std::bind(&PeerLogicValidation::CheckForStaleTipAndEvictPeers, this, consensusParams), EXTRA_PEER_CHECK_INTERVAL * 1000);
}
void PeerLogicValidation::BlockConnected(const std::shared_ptr<const CBlock>& pblock, const CBlockIndex* pindex, const std::vector<CTransactionRef>& vtxConflicted) {
@ -797,6 +831,8 @@ void PeerLogicValidation::BlockConnected(const std::shared_ptr<const CBlock>& pb
}
LogPrint(BCLog::MEMPOOL, "Erased %d orphan tx included or conflicted by block\n", nErased);
}
g_last_tip_update = GetTime();
}
// All of the following cache a recent block, and are protected by cs_most_recent_block
@ -1215,6 +1251,7 @@ bool static ProcessHeadersMessage(CNode *pfrom, CConnman *connman, const std::ve
return true;
}
bool received_new_header = false;
const CBlockIndex *pindexLast = nullptr;
{
LOCK(cs_main);
@ -1255,6 +1292,12 @@ bool static ProcessHeadersMessage(CNode *pfrom, CConnman *connman, const std::ve
}
hashLastBlock = header.GetHash();
}
// If we don't have the last header, then they'll have given us
// something new (if these headers are valid).
if (mapBlockIndex.find(hashLastBlock) == mapBlockIndex.end()) {
received_new_header = true;
}
}
CValidationState state;
@ -1319,6 +1362,10 @@ bool static ProcessHeadersMessage(CNode *pfrom, CConnman *connman, const std::ve
// because it is set in UpdateBlockAvailability. Some nullptr checks
// are still present, however, as belt-and-suspenders.
if (received_new_header && pindexLast->nChainWork > chainActive.Tip()->nChainWork) {
nodestate->m_last_block_announcement = GetTime();
}
if (nCount == MAX_HEADERS_RESULTS) {
// Headers message had its maximum size; the peer may have more headers.
// TODO: optimize: if pindexLast is an ancestor of chainActive.Tip or pindexBestHeader, continue
@ -1403,6 +1450,7 @@ bool static ProcessHeadersMessage(CNode *pfrom, CConnman *connman, const std::ve
// If this is an outbound peer, check to see if we should protect
// it from the bad/lagging chain logic.
if (g_outbound_peers_with_protect_from_disconnect < MAX_OUTBOUND_PEERS_TO_PROTECT_FROM_DISCONNECT && nodestate->pindexBestKnownBlock->nChainWork >= chainActive.Tip()->nChainWork && !nodestate->m_chain_sync.m_protect) {
LogPrint(BCLog::NET, "Protecting outbound peer=%d from eviction\n", pfrom->GetId());
nodestate->m_chain_sync.m_protect = true;
++g_outbound_peers_with_protect_from_disconnect;
}
@ -2219,6 +2267,8 @@ bool static ProcessMessage(CNode* pfrom, const std::string& strCommand, CDataStr
CBlockHeaderAndShortTxIDs cmpctblock;
vRecv >> cmpctblock;
bool received_new_header = false;
{
LOCK(cs_main);
@ -2228,6 +2278,10 @@ bool static ProcessMessage(CNode* pfrom, const std::string& strCommand, CDataStr
connman->PushMessage(pfrom, msgMaker.Make(NetMsgType::GETHEADERS, chainActive.GetLocator(pindexBestHeader), uint256()));
return true;
}
if (mapBlockIndex.find(cmpctblock.header.GetHash()) == mapBlockIndex.end()) {
received_new_header = true;
}
}
const CBlockIndex *pindex = nullptr;
@ -2266,6 +2320,14 @@ bool static ProcessMessage(CNode* pfrom, const std::string& strCommand, CDataStr
assert(pindex);
UpdateBlockAvailability(pfrom->GetId(), pindex->GetBlockHash());
CNodeState *nodestate = State(pfrom->GetId());
// If this was a new header with more work than our tip, update the
// peer's last block announcement time
if (received_new_header && pindex->nChainWork > chainActive.Tip()->nChainWork) {
nodestate->m_last_block_announcement = GetTime();
}
std::map<uint256, std::pair<NodeId, std::list<QueuedBlock>::iterator> >::iterator blockInFlightIt = mapBlocksInFlight.find(pindex->GetBlockHash());
bool fAlreadyInFlight = blockInFlightIt != mapBlocksInFlight.end();
@ -2288,8 +2350,6 @@ bool static ProcessMessage(CNode* pfrom, const std::string& strCommand, CDataStr
if (!fAlreadyInFlight && !CanDirectFetch(chainparams.GetConsensus()))
return true;
CNodeState *nodestate = State(pfrom->GetId());
if (IsWitnessEnabled(pindex->pprev, chainparams.GetConsensus()) && !nodestate->fSupportsDesiredCmpctVersion) {
// Don't bother trying to process compact blocks from v1 peers
// after segwit activates.
@ -2967,6 +3027,83 @@ void PeerLogicValidation::ConsiderEviction(CNode *pto, int64_t time_in_seconds)
}
}
void PeerLogicValidation::EvictExtraOutboundPeers(int64_t time_in_seconds)
{
// Check whether we have too many outbound peers
int extra_peers = connman->GetExtraOutboundCount();
if (extra_peers > 0) {
// If we have more outbound peers than we target, disconnect one.
// Pick the outbound peer that least recently announced
// us a new block, with ties broken by choosing the more recent
// connection (higher node id)
NodeId worst_peer = -1;
int64_t oldest_block_announcement = std::numeric_limits<int64_t>::max();
LOCK(cs_main);
connman->ForEachNode([&](CNode* pnode) {
// Ignore non-outbound peers, or nodes marked for disconnect already
if (!IsOutboundDisconnectionCandidate(pnode) || pnode->fDisconnect) return;
CNodeState *state = State(pnode->GetId());
if (state == nullptr) return; // shouldn't be possible, but just in case
// Don't evict our protected peers
if (state->m_chain_sync.m_protect) return;
if (state->m_last_block_announcement < oldest_block_announcement || (state->m_last_block_announcement == oldest_block_announcement && pnode->GetId() > worst_peer)) {
worst_peer = pnode->GetId();
oldest_block_announcement = state->m_last_block_announcement;
}
});
if (worst_peer != -1) {
bool disconnected = connman->ForNode(worst_peer, [&](CNode *pnode) {
// Only disconnect a peer that has been connected to us for
// some reasonable fraction of our check-frequency, to give
// it time for new information to have arrived.
// Also don't disconnect any peer we're trying to download a
// block from.
CNodeState &state = *State(pnode->GetId());
if (time_in_seconds - pnode->nTimeConnected > MINIMUM_CONNECT_TIME && state.nBlocksInFlight == 0) {
LogPrint(BCLog::NET, "disconnecting extra outbound peer=%d (last block announcement received at time %d)\n", pnode->GetId(), oldest_block_announcement);
pnode->fDisconnect = true;
return true;
} else {
LogPrint(BCLog::NET, "keeping outbound peer=%d chosen for eviction (connect time: %d, blocks_in_flight: %d)\n", pnode->GetId(), pnode->nTimeConnected, state.nBlocksInFlight);
return false;
}
});
if (disconnected) {
// If we disconnected an extra peer, that means we successfully
// connected to at least one peer after the last time we
// detected a stale tip. Don't try any more extra peers until
// we next detect a stale tip, to limit the load we put on the
// network from these extra connections.
connman->SetTryNewOutboundPeer(false);
}
}
}
}
void PeerLogicValidation::CheckForStaleTipAndEvictPeers(const Consensus::Params &consensusParams)
{
if (connman == nullptr) return;
int64_t time_in_seconds = GetTime();
EvictExtraOutboundPeers(time_in_seconds);
if (time_in_seconds > m_stale_tip_check_time) {
LOCK(cs_main);
// Check whether our tip is stale, and if so, allow using an extra
// outbound peer
if (TipMayBeStale(consensusParams)) {
LogPrintf("Potential stale tip detected, will try using extra outbound peer (last tip update: %d seconds ago)\n", time_in_seconds - g_last_tip_update);
connman->SetTryNewOutboundPeer(true);
} else if (connman->GetTryNewOutboundPeer()) {
connman->SetTryNewOutboundPeer(false);
}
m_stale_tip_check_time = time_in_seconds + STALE_CHECK_INTERVAL;
}
}
class CompareInvMempoolOrder
{
CTxMemPool *mp;

View File

@ -8,6 +8,7 @@
#include "net.h"
#include "validationinterface.h"
#include "consensus/params.h"
/** Default for -maxorphantx, maximum number of orphan transactions kept in memory */
static const unsigned int DEFAULT_MAX_ORPHAN_TRANSACTIONS = 100;
@ -27,13 +28,19 @@ static constexpr int64_t HEADERS_DOWNLOAD_TIMEOUT_PER_HEADER = 1000; // 1ms/head
static constexpr int32_t MAX_OUTBOUND_PEERS_TO_PROTECT_FROM_DISCONNECT = 4;
/** Timeout for (unprotected) outbound peers to sync to our chainwork, in seconds */
static constexpr int64_t CHAIN_SYNC_TIMEOUT = 20 * 60; // 20 minutes
/** How frequently to check for stale tips, in seconds */
static constexpr int64_t STALE_CHECK_INTERVAL = 10 * 60; // 10 minutes
/** How frequently to check for extra outbound peers and disconnect, in seconds */
static constexpr int64_t EXTRA_PEER_CHECK_INTERVAL = 45;
/** Minimum time an outbound-peer-eviction candidate must be connected for, in order to evict, in seconds */
static constexpr int64_t MINIMUM_CONNECT_TIME = 30;
class PeerLogicValidation : public CValidationInterface, public NetEventsInterface {
private:
CConnman* connman;
CConnman* const connman;
public:
explicit PeerLogicValidation(CConnman* connman);
explicit PeerLogicValidation(CConnman* connman, CScheduler &scheduler);
void BlockConnected(const std::shared_ptr<const CBlock>& pblock, const CBlockIndex* pindexConnected, const std::vector<CTransactionRef>& vtxConflicted) override;
void UpdatedBlockTip(const CBlockIndex *pindexNew, const CBlockIndex *pindexFork, bool fInitialDownload) override;
@ -55,6 +62,11 @@ public:
bool SendMessages(CNode* pto, std::atomic<bool>& interrupt) override;
void ConsiderEviction(CNode *pto, int64_t time_in_seconds);
void CheckForStaleTipAndEvictPeers(const Consensus::Params &consensusParams);
void EvictExtraOutboundPeers(int64_t time_in_seconds);
private:
int64_t m_stale_tip_check_time; //! Next time to check for stale tip
};
struct CNodeStateStats {

View File

@ -40,6 +40,8 @@ CService ip(uint32_t i)
static NodeId id = 0;
void UpdateLastBlockAnnounceTime(NodeId node, int64_t time_in_seconds);
BOOST_FIXTURE_TEST_SUITE(DoS_tests, TestingSetup)
// Test eviction of an outbound peer whose chain never advances
@ -87,6 +89,89 @@ BOOST_AUTO_TEST_CASE(outbound_slow_chain_eviction)
peerLogic->FinalizeNode(dummyNode1.GetId(), dummy);
}
void AddRandomOutboundPeer(std::vector<CNode *> &vNodes, PeerLogicValidation &peerLogic)
{
CAddress addr(ip(GetRandInt(0xffffffff)), NODE_NONE);
vNodes.emplace_back(new CNode(id++, ServiceFlags(NODE_NETWORK|NODE_WITNESS), 0, INVALID_SOCKET, addr, 0, 0, CAddress(), "", /*fInboundIn=*/ false));
CNode &node = *vNodes.back();
node.SetSendVersion(PROTOCOL_VERSION);
peerLogic.InitializeNode(&node);
node.nVersion = 1;
node.fSuccessfullyConnected = true;
CConnmanTest::AddNode(node);
}
BOOST_AUTO_TEST_CASE(stale_tip_peer_management)
{
const Consensus::Params& consensusParams = Params().GetConsensus();
constexpr int nMaxOutbound = 8;
CConnman::Options options;
options.nMaxConnections = 125;
options.nMaxOutbound = nMaxOutbound;
options.nMaxFeeler = 1;
connman->Init(options);
std::vector<CNode *> vNodes;
// Mock some outbound peers
for (int i=0; i<nMaxOutbound; ++i) {
AddRandomOutboundPeer(vNodes, *peerLogic);
}
peerLogic->CheckForStaleTipAndEvictPeers(consensusParams);
// No nodes should be marked for disconnection while we have no extra peers
for (const CNode *node : vNodes) {
BOOST_CHECK(node->fDisconnect == false);
}
SetMockTime(GetTime() + 3*consensusParams.nPowTargetSpacing + 1);
// Now tip should definitely be stale, and we should look for an extra
// outbound peer
peerLogic->CheckForStaleTipAndEvictPeers(consensusParams);
BOOST_CHECK(connman->GetTryNewOutboundPeer());
// Still no peers should be marked for disconnection
for (const CNode *node : vNodes) {
BOOST_CHECK(node->fDisconnect == false);
}
// If we add one more peer, something should get marked for eviction
// on the next check (since we're mocking the time to be in the future, the
// required time connected check should be satisfied).
AddRandomOutboundPeer(vNodes, *peerLogic);
peerLogic->CheckForStaleTipAndEvictPeers(consensusParams);
for (int i=0; i<nMaxOutbound; ++i) {
BOOST_CHECK(vNodes[i]->fDisconnect == false);
}
// Last added node should get marked for eviction
BOOST_CHECK(vNodes.back()->fDisconnect == true);
vNodes.back()->fDisconnect = false;
// Update the last announced block time for the last
// peer, and check that the next newest node gets evicted.
UpdateLastBlockAnnounceTime(vNodes.back()->GetId(), GetTime());
peerLogic->CheckForStaleTipAndEvictPeers(consensusParams);
for (int i=0; i<nMaxOutbound-1; ++i) {
BOOST_CHECK(vNodes[i]->fDisconnect == false);
}
BOOST_CHECK(vNodes[nMaxOutbound-1]->fDisconnect == true);
BOOST_CHECK(vNodes.back()->fDisconnect == false);
bool dummy;
for (const CNode *node : vNodes) {
peerLogic->FinalizeNode(node->GetId(), dummy);
}
CConnmanTest::ClearNodes();
}
BOOST_AUTO_TEST_CASE(DoS_banning)
{
std::atomic<bool> interruptDummy(false);

View File

@ -25,6 +25,18 @@
#include <memory>
void CConnmanTest::AddNode(CNode& node)
{
LOCK(g_connman->cs_vNodes);
g_connman->vNodes.push_back(&node);
}
void CConnmanTest::ClearNodes()
{
LOCK(g_connman->cs_vNodes);
g_connman->vNodes.clear();
}
uint256 insecure_rand_seed = GetRandHash();
FastRandomContext insecure_rand_ctx(insecure_rand_seed);
@ -86,7 +98,7 @@ TestingSetup::TestingSetup(const std::string& chainName) : BasicTestingSetup(cha
threadGroup.create_thread(&ThreadScriptCheck);
g_connman = std::unique_ptr<CConnman>(new CConnman(0x1337, 0x1337)); // Deterministic randomness for tests.
connman = g_connman.get();
peerLogic.reset(new PeerLogicValidation(connman));
peerLogic.reset(new PeerLogicValidation(connman, scheduler));
}
TestingSetup::~TestingSetup()

View File

@ -49,6 +49,12 @@ struct BasicTestingSetup {
* Included are data directory, coins database, script check threads setup.
*/
class CConnman;
class CNode;
struct CConnmanTest {
static void AddNode(CNode& node);
static void ClearNodes();
};
class PeerLogicValidation;
struct TestingSetup: public BasicTestingSetup {
CCoinsViewDB *pcoinsdbview;