Recovery from MASTERNODE_NEW_START_REQUIRED state (#1232)

Implement masternode recovery from MASTERNODE_NEW_START_REQUIRED state via mn quorums
This commit is contained in:
UdjinM6 2017-01-01 21:48:53 +04:00 committed by GitHub
parent f6ca563ab2
commit e4a290693b
7 changed files with 128 additions and 21 deletions

View File

@ -2478,16 +2478,17 @@ void ThreadCheckDarkSendPool()
nTick++; nTick++;
// make sure to check all masternodes first
mnodeman.Check();
// check if we should activate or ping every few minutes, // check if we should activate or ping every few minutes,
// slightly postpone first run to give net thread a chance to connect to some peers // slightly postpone first run to give net thread a chance to connect to some peers
if(nTick % MASTERNODE_MIN_MNP_SECONDS == 15) if(nTick % MASTERNODE_MIN_MNP_SECONDS == 15)
activeMasternode.ManageState(); activeMasternode.ManageState();
mnodeman.Check();
if(nTick % 60 == 0) { if(nTick % 60 == 0) {
mnodeman.CheckAndRemove();
mnodeman.ProcessMasternodeConnections(); mnodeman.ProcessMasternodeConnections();
mnodeman.CheckAndRemove();
mnpayments.CheckAndRemove(); mnpayments.CheckAndRemove();
CleanTxLockCandidates(); CleanTxLockCandidates();
} }

View File

@ -4944,7 +4944,7 @@ bool static AlreadyHave(const CInv& inv) EXCLUSIVE_LOCKS_REQUIRED(cs_main)
} }
case MSG_MASTERNODE_ANNOUNCE: case MSG_MASTERNODE_ANNOUNCE:
return mnodeman.mapSeenMasternodeBroadcast.count(inv.hash); return mnodeman.mapSeenMasternodeBroadcast.count(inv.hash) && !mnodeman.IsMnbRecoveryRequested(inv.hash);
case MSG_MASTERNODE_PING: case MSG_MASTERNODE_PING:
return mnodeman.mapSeenMasternodePing.count(inv.hash); return mnodeman.mapSeenMasternodePing.count(inv.hash);

View File

@ -109,7 +109,7 @@ CMasternode::CMasternode(const CMasternodeBroadcast& mnb) :
// //
bool CMasternode::UpdateFromNewBroadcast(CMasternodeBroadcast& mnb) bool CMasternode::UpdateFromNewBroadcast(CMasternodeBroadcast& mnb)
{ {
if(mnb.sigTime <= sigTime) return false; if(mnb.sigTime <= sigTime && !mnb.fRecovery) return false;
pubKeyMasternode = mnb.pubKeyMasternode; pubKeyMasternode = mnb.pubKeyMasternode;
sigTime = mnb.sigTime; sigTime = mnb.sigTime;
@ -119,7 +119,6 @@ bool CMasternode::UpdateFromNewBroadcast(CMasternodeBroadcast& mnb)
nPoSeBanScore = 0; nPoSeBanScore = 0;
nPoSeBanHeight = 0; nPoSeBanHeight = 0;
nTimeLastChecked = 0; nTimeLastChecked = 0;
nTimeLastWatchdogVote = mnb.sigTime;
int nDos = 0; int nDos = 0;
if(mnb.lastPing == CMasternodePing() || (mnb.lastPing != CMasternodePing() && mnb.lastPing.CheckAndUpdate(this, true, nDos))) { if(mnb.lastPing == CMasternodePing() || (mnb.lastPing != CMasternodePing() && mnb.lastPing.CheckAndUpdate(this, true, nDos))) {
lastPing = mnb.lastPing; lastPing = mnb.lastPing;
@ -569,7 +568,7 @@ bool CMasternodeBroadcast::Update(CMasternode* pmn, int& nDos)
{ {
nDos = 0; nDos = 0;
if(pmn->sigTime == sigTime) { if(pmn->sigTime == sigTime && !fRecovery) {
// mapSeenMasternodeBroadcast in CMasternodeMan::CheckMnbAndUpdateMasternodeList should filter legit duplicates // mapSeenMasternodeBroadcast in CMasternodeMan::CheckMnbAndUpdateMasternodeList should filter legit duplicates
// but this still can happen if we just started, which is ok, just do nothing here. // but this still can happen if we just started, which is ok, just do nothing here.
return false; return false;

View File

@ -340,10 +340,12 @@ class CMasternodeBroadcast : public CMasternode
{ {
public: public:
CMasternodeBroadcast() : CMasternode() {} bool fRecovery;
CMasternodeBroadcast(const CMasternode& mn) : CMasternode(mn) {}
CMasternodeBroadcast() : CMasternode(), fRecovery(false) {}
CMasternodeBroadcast(const CMasternode& mn) : CMasternode(mn), fRecovery(false) {}
CMasternodeBroadcast(CService addrNew, CTxIn vinNew, CPubKey pubKeyCollateralAddressNew, CPubKey pubKeyMasternodeNew, int nProtocolVersionIn) : CMasternodeBroadcast(CService addrNew, CTxIn vinNew, CPubKey pubKeyCollateralAddressNew, CPubKey pubKeyMasternodeNew, int nProtocolVersionIn) :
CMasternode(addrNew, vinNew, pubKeyCollateralAddressNew, pubKeyMasternodeNew, nProtocolVersionIn) {} CMasternode(addrNew, vinNew, pubKeyCollateralAddressNew, pubKeyMasternodeNew, nProtocolVersionIn), fRecovery(false) {}
ADD_SERIALIZE_METHODS; ADD_SERIALIZE_METHODS;

View File

@ -15,7 +15,7 @@
/** Masternode manager */ /** Masternode manager */
CMasternodeMan mnodeman; CMasternodeMan mnodeman;
const std::string CMasternodeMan::SERIALIZATION_VERSION_STRING = "CMasternodeMan-Version-3"; const std::string CMasternodeMan::SERIALIZATION_VERSION_STRING = "CMasternodeMan-Version-4";
struct CompareLastPaidBlock struct CompareLastPaidBlock
{ {
@ -122,7 +122,6 @@ bool CMasternodeMan::Add(CMasternode &mn)
CMasternode *pmn = Find(mn.vin); CMasternode *pmn = Find(mn.vin);
if (pmn == NULL) { if (pmn == NULL) {
LogPrint("masternode", "CMasternodeMan::Add -- Adding new Masternode: addr=%s, %i now\n", mn.addr.ToString(), size() + 1); LogPrint("masternode", "CMasternodeMan::Add -- Adding new Masternode: addr=%s, %i now\n", mn.addr.ToString(), size() + 1);
mn.nTimeLastWatchdogVote = mn.sigTime;
vMasternodes.push_back(mn); vMasternodes.push_back(mn);
indexMasternodes.AddMasternodeVIN(mn.vin); indexMasternodes.AddMasternodeVIN(mn.vin);
fMasternodesAdded = true; fMasternodesAdded = true;
@ -176,30 +175,103 @@ void CMasternodeMan::CheckAndRemove()
{ {
LogPrintf("CMasternodeMan::CheckAndRemove\n"); LogPrintf("CMasternodeMan::CheckAndRemove\n");
Check();
{ {
LOCK(cs); // Need LOCK2 here to ensure consistent locking order because code below locks cs_main
// through GetHeight() signal in ConnectNode and in CheckMnbAndUpdateMasternodeList()
LOCK2(cs_main, cs);
// Remove inactive and outdated masternodes Check();
// Remove spent masternodes, prepare structures and make requests to reasure the state of inactive ones
std::vector<CMasternode>::iterator it = vMasternodes.begin(); std::vector<CMasternode>::iterator it = vMasternodes.begin();
std::vector<std::pair<int, CMasternode> > vecMasternodeRanks;
bool fAskedForMnbRecovery = false; // ask for one mn at a time
while(it != vMasternodes.end()) { while(it != vMasternodes.end()) {
CMasternodeBroadcast mnb = CMasternodeBroadcast(*it);
uint256 hash = mnb.GetHash();
// If collateral was spent ... // If collateral was spent ...
if ((*it).IsOutpointSpent()) { if ((*it).IsOutpointSpent()) {
LogPrint("masternode", "CMasternodeMan::CheckAndRemove -- Removing Masternode: %s addr=%s %i now\n", (*it).GetStateString(), (*it).addr.ToString(), size() - 1); LogPrint("masternode", "CMasternodeMan::CheckAndRemove -- Removing Masternode: %s addr=%s %i now\n", (*it).GetStateString(), (*it).addr.ToString(), size() - 1);
// erase all of the broadcasts we've seen from this txin, ... // erase all of the broadcasts we've seen from this txin, ...
mapSeenMasternodeBroadcast.erase(CMasternodeBroadcast(*it).GetHash()); mapSeenMasternodeBroadcast.erase(hash);
mWeAskedForMasternodeListEntry.erase((*it).vin.prevout); mWeAskedForMasternodeListEntry.erase((*it).vin.prevout);
// and finally remove it from the list // and finally remove it from the list
it = vMasternodes.erase(it); it = vMasternodes.erase(it);
fMasternodesRemoved = true; fMasternodesRemoved = true;
} else { } else {
if(pCurrentBlockIndex && !fAskedForMnbRecovery && it->IsNewStartRequired() && !IsMnbRecoveryRequested(hash)) {
// this mn is in a non-recoverable state and we haven't asked other nodes yet
std::set<CNetAddr> setRequested;
// calulate only once and only when it's needed
if(vecMasternodeRanks.empty()) {
int nRandomBlockHeight = GetRandInt(pCurrentBlockIndex->nHeight);
vecMasternodeRanks = GetMasternodeRanks(nRandomBlockHeight);
}
// ask first MNB_RECOVERY_QUORUM_TOTAL mns we can connect to and we haven't asked recently
for(int i = 0; setRequested.size() < MNB_RECOVERY_QUORUM_TOTAL && i < (int)vecMasternodeRanks.size(); i++) {
// avoid banning
if(mWeAskedForMasternodeListEntry.count(it->vin.prevout) && mWeAskedForMasternodeListEntry[it->vin.prevout].count(vecMasternodeRanks[i].second.addr)) continue;
// didn't ask recently, ok to ask now
CService addr = vecMasternodeRanks[i].second.addr;
CNode* pnode = ConnectNode(CAddress(addr), NULL, true);
if(pnode) {
LogPrint("masternode", "CMasternodeMan::CheckAndRemove -- asking for mnb of %s, addr=%s\n", it->vin.prevout.ToStringShort(), addr.ToString());
setRequested.insert(addr);
// can't use AskForMN here, inv system is way too smart, request data directly instead
std::vector<CInv> vToFetch;
vToFetch.push_back(CInv(MSG_MASTERNODE_ANNOUNCE, hash));
pnode->PushMessage(NetMsgType::GETDATA, vToFetch);
fAskedForMnbRecovery = true;
} else {
LogPrint("masternode", "CMasternodeMan::CheckAndRemove -- can't connect to node to ask for mnb, addr=%s\n", addr.ToString());
}
}
// wait for mnb recovery replies for MNB_RECOVERY_WAIT_SECONDS seconds
mMnbRecoveryRequests[hash] = std::make_pair(GetTime() + MNB_RECOVERY_WAIT_SECONDS, setRequested);
}
++it; ++it;
} }
} }
// proces replies for MASTERNODE_NEW_START_REQUIRED masternodes
LogPrint("masternode", "CMasternodeMan::CheckAndRemove -- mMnbRecoveryGoodReplies size=%d\n", (int)mMnbRecoveryGoodReplies.size());
std::map<uint256, std::vector<CMasternodeBroadcast> >::iterator itMnbReplies = mMnbRecoveryGoodReplies.begin();
while(itMnbReplies != mMnbRecoveryGoodReplies.end()){
if(mMnbRecoveryRequests[itMnbReplies->first].first < GetTime()) {
// all nodes we asked should have replied now
if(itMnbReplies->second.size() >= MNB_RECOVERY_QUORUM_REQUIRED) {
// majority of nodes we asked agrees that this mn doesn't require new mnb, reprocess one of new mnbs
LogPrint("masternode", "CMasternodeMan::CheckAndRemove -- reprocessing mnb, masternode=%s\n", itMnbReplies->second[0].vin.prevout.ToStringShort());
// mapSeenMasternodeBroadcast.erase(itMnbReplies->first);
int nDos;
itMnbReplies->second[0].fRecovery = true;
CheckMnbAndUpdateMasternodeList(NULL, itMnbReplies->second[0], nDos);
}
LogPrint("masternode", "CMasternodeMan::CheckAndRemove -- removing mnb recovery reply, masternode=%s, size=%d\n", itMnbReplies->second[0].vin.prevout.ToStringShort(), (int)itMnbReplies->second.size());
mMnbRecoveryGoodReplies.erase(itMnbReplies++);
} else {
++itMnbReplies;
}
}
}
{
// no need for cm_main below
LOCK(cs);
std::map<uint256, std::pair< int64_t, std::set<CNetAddr> > >::iterator itMnbRequest = mMnbRecoveryRequests.begin();
while(itMnbRequest != mMnbRecoveryRequests.end()){
// Allow this mnb to be re-verified again after MNB_RECOVERY_RETRY_SECONDS seconds
// if mn is still in MASTERNODE_NEW_START_REQUIRED state.
if(GetTime() - itMnbRequest->second.first > MNB_RECOVERY_RETRY_SECONDS) {
mMnbRecoveryRequests.erase(itMnbRequest++);
} else {
++itMnbRequest;
}
}
// check who's asked for the Masternode list // check who's asked for the Masternode list
std::map<CNetAddr, int64_t>::iterator it1 = mAskedUsForMasternodeList.begin(); std::map<CNetAddr, int64_t>::iterator it1 = mAskedUsForMasternodeList.begin();
while(it1 != mAskedUsForMasternodeList.end()){ while(it1 != mAskedUsForMasternodeList.end()){
@ -712,7 +784,7 @@ void CMasternodeMan::ProcessMessage(CNode* pfrom, std::string& strCommand, CData
int nDos = 0; int nDos = 0;
if (CheckMnbAndUpdateMasternodeList(mnb, nDos)) { if (CheckMnbAndUpdateMasternodeList(pfrom, mnb, nDos)) {
// use announced Masternode as a peer // use announced Masternode as a peer
addrman.Add(CAddress(mnb.addr), pfrom->addr, 2*60*60); addrman.Add(CAddress(mnb.addr), pfrom->addr, 2*60*60);
} else if(nDos > 0) { } else if(nDos > 0) {
@ -1300,7 +1372,7 @@ void CMasternodeMan::UpdateMasternodeList(CMasternodeBroadcast mnb)
} }
} }
bool CMasternodeMan::CheckMnbAndUpdateMasternodeList(CMasternodeBroadcast mnb, int& nDos) bool CMasternodeMan::CheckMnbAndUpdateMasternodeList(CNode* pfrom, CMasternodeBroadcast mnb, int& nDos)
{ {
// Need LOCK2 here to ensure consistent locking order because the SimpleCheck call below locks cs_main // Need LOCK2 here to ensure consistent locking order because the SimpleCheck call below locks cs_main
LOCK2(cs_main, cs); LOCK2(cs_main, cs);
@ -1309,7 +1381,7 @@ bool CMasternodeMan::CheckMnbAndUpdateMasternodeList(CMasternodeBroadcast mnb, i
LogPrint("masternode", "CMasternodeMan::CheckMnbAndUpdateMasternodeList -- masternode=%s\n", mnb.vin.prevout.ToStringShort()); LogPrint("masternode", "CMasternodeMan::CheckMnbAndUpdateMasternodeList -- masternode=%s\n", mnb.vin.prevout.ToStringShort());
uint256 hash = mnb.GetHash(); uint256 hash = mnb.GetHash();
if(mapSeenMasternodeBroadcast.count(hash)) { //seen if(mapSeenMasternodeBroadcast.count(hash) && !mnb.fRecovery) { //seen
LogPrint("masternode", "CMasternodeMan::CheckMnbAndUpdateMasternodeList -- masternode=%s seen\n", mnb.vin.prevout.ToStringShort()); LogPrint("masternode", "CMasternodeMan::CheckMnbAndUpdateMasternodeList -- masternode=%s seen\n", mnb.vin.prevout.ToStringShort());
// less then 2 pings left before this MN goes into non-recoverable state, bump sync timeout // less then 2 pings left before this MN goes into non-recoverable state, bump sync timeout
if(GetTime() - mapSeenMasternodeBroadcast[hash].first > MASTERNODE_NEW_START_REQUIRED_SECONDS - MASTERNODE_MIN_MNP_SECONDS * 2) { if(GetTime() - mapSeenMasternodeBroadcast[hash].first > MASTERNODE_NEW_START_REQUIRED_SECONDS - MASTERNODE_MIN_MNP_SECONDS * 2) {
@ -1317,6 +1389,27 @@ bool CMasternodeMan::CheckMnbAndUpdateMasternodeList(CMasternodeBroadcast mnb, i
mapSeenMasternodeBroadcast[hash].first = GetTime(); mapSeenMasternodeBroadcast[hash].first = GetTime();
masternodeSync.AddedMasternodeList(); masternodeSync.AddedMasternodeList();
} }
// did we ask this node for it?
if(pfrom && IsMnbRecoveryRequested(hash) && GetTime() < mMnbRecoveryRequests[hash].first) {
LogPrint("masternode", "CMasternodeMan::CheckMnbAndUpdateMasternodeList -- mnb=%s seen request\n", hash.ToString());
if(mMnbRecoveryRequests[hash].second.count(pfrom->addr)) {
LogPrint("masternode", "CMasternodeMan::CheckMnbAndUpdateMasternodeList -- mnb=%s seen request, addr=%s\n", hash.ToString(), pfrom->addr.ToString());
// do not allow node to send same mnb multiple times in recovery mode
mMnbRecoveryRequests[hash].second.erase(pfrom->addr);
// does it have newer lastPing?
if(mnb.lastPing.sigTime > mapSeenMasternodeBroadcast[hash].second.lastPing.sigTime) {
// simulate Check
CMasternode mnTemp = CMasternode(mnb);
mnTemp.Check();
LogPrint("masternode", "CMasternodeMan::CheckMnbAndUpdateMasternodeList -- mnb=%s seen request, addr=%s, better lastPing: %d min ago, projected mn state: %s\n", hash.ToString(), pfrom->addr.ToString(), (GetTime() - mnb.lastPing.sigTime)/60, mnTemp.GetStateString());
if(mnTemp.IsValidStateForAutoStart(mnTemp.nActiveState)) {
// this node thinks it's a good one
LogPrint("masternode", "CMasternodeMan::CheckMnbAndUpdateMasternodeList -- masternode=%s seen good\n", mnb.vin.prevout.ToStringShort());
mMnbRecoveryGoodReplies[hash].push_back(mnb);
}
}
}
}
return true; return true;
} }
mapSeenMasternodeBroadcast.insert(std::make_pair(hash, std::make_pair(GetTime(), mnb))); mapSeenMasternodeBroadcast.insert(std::make_pair(hash, std::make_pair(GetTime(), mnb)));

View File

@ -103,6 +103,11 @@ private:
static const int MAX_POSE_RANK = 10; static const int MAX_POSE_RANK = 10;
static const int MAX_POSE_BLOCKS = 10; static const int MAX_POSE_BLOCKS = 10;
static const int MNB_RECOVERY_QUORUM_TOTAL = 10;
static const int MNB_RECOVERY_QUORUM_REQUIRED = 6;
static const int MNB_RECOVERY_WAIT_SECONDS = 60;
static const int MNB_RECOVERY_RETRY_SECONDS = 3 * 60 * 60;
// critical section to protect the inner data structures // critical section to protect the inner data structures
mutable CCriticalSection cs; mutable CCriticalSection cs;
@ -121,6 +126,10 @@ private:
// who we asked for the masternode verification // who we asked for the masternode verification
std::map<CNetAddr, CMasternodeVerification> mWeAskedForVerification; std::map<CNetAddr, CMasternodeVerification> mWeAskedForVerification;
// these maps are used for masternode recovery from MASTERNODE_NEW_START_REQUIRED state
std::map<uint256, std::pair< int64_t, std::set<CNetAddr> > > mMnbRecoveryRequests;
std::map<uint256, std::vector<CMasternodeBroadcast> > mMnbRecoveryGoodReplies;
int64_t nLastIndexRebuildTime; int64_t nLastIndexRebuildTime;
CMasternodeIndex indexMasternodes; CMasternodeIndex indexMasternodes;
@ -171,6 +180,8 @@ public:
READWRITE(mAskedUsForMasternodeList); READWRITE(mAskedUsForMasternodeList);
READWRITE(mWeAskedForMasternodeList); READWRITE(mWeAskedForMasternodeList);
READWRITE(mWeAskedForMasternodeListEntry); READWRITE(mWeAskedForMasternodeListEntry);
READWRITE(mMnbRecoveryRequests);
READWRITE(mMnbRecoveryGoodReplies);
READWRITE(nLastWatchdogVoteTime); READWRITE(nLastWatchdogVoteTime);
READWRITE(nDsqCount); READWRITE(nDsqCount);
@ -304,7 +315,8 @@ public:
/// Update masternode list and maps using provided CMasternodeBroadcast /// Update masternode list and maps using provided CMasternodeBroadcast
void UpdateMasternodeList(CMasternodeBroadcast mnb); void UpdateMasternodeList(CMasternodeBroadcast mnb);
/// Perform complete check and only then update list and maps /// Perform complete check and only then update list and maps
bool CheckMnbAndUpdateMasternodeList(CMasternodeBroadcast mnb, int& nDos); bool CheckMnbAndUpdateMasternodeList(CNode* pfrom, CMasternodeBroadcast mnb, int& nDos);
bool IsMnbRecoveryRequested(const uint256& hash) { return mMnbRecoveryRequests.count(hash); }
void UpdateLastPaid(); void UpdateLastPaid();

View File

@ -765,7 +765,7 @@ UniValue masternodebroadcast(const UniValue& params, bool fHelp)
bool fResult; bool fResult;
if (mnb.CheckSignature(nDos)) { if (mnb.CheckSignature(nDos)) {
if (fSafe) { if (fSafe) {
fResult = mnodeman.CheckMnbAndUpdateMasternodeList(mnb, nDos); fResult = mnodeman.CheckMnbAndUpdateMasternodeList(NULL, mnb, nDos);
} else { } else {
mnodeman.UpdateMasternodeList(mnb); mnodeman.UpdateMasternodeList(mnb);
mnb.Relay(); mnb.Relay();