Merge pull request #6064
f46a680
Better mruset unit test (Pieter Wuille)d4d5022
Use ring buffer of set iterators instead of deque of copies in mruset (Pieter Wuille)d81cff3
Replace mruset setAddrKnown with CRollingBloomFilter addrKnown (Gavin Andresen)69a5f8b
Rolling bloom filter class (Gavin Andresen)
This commit is contained in:
commit
b46e7c24e5
@ -40,6 +40,17 @@ nFlags(nFlagsIn)
|
||||
{
|
||||
}
|
||||
|
||||
// Private constructor used by CRollingBloomFilter
|
||||
CBloomFilter::CBloomFilter(unsigned int nElements, double nFPRate, unsigned int nTweakIn) :
|
||||
vData((unsigned int)(-1 / LN2SQUARED * nElements * log(nFPRate)) / 8),
|
||||
isFull(false),
|
||||
isEmpty(true),
|
||||
nHashFuncs((unsigned int)(vData.size() * 8 / nElements * LN2)),
|
||||
nTweak(nTweakIn),
|
||||
nFlags(BLOOM_UPDATE_NONE)
|
||||
{
|
||||
}
|
||||
|
||||
inline unsigned int CBloomFilter::Hash(unsigned int nHashNum, const std::vector<unsigned char>& vDataToHash) const
|
||||
{
|
||||
// 0xFBA4C795 chosen as it guarantees a reasonable bit difference between nHashNum values.
|
||||
@ -197,3 +208,43 @@ void CBloomFilter::UpdateEmptyFull()
|
||||
isFull = full;
|
||||
isEmpty = empty;
|
||||
}
|
||||
|
||||
CRollingBloomFilter::CRollingBloomFilter(unsigned int nElements, double fpRate, unsigned int nTweak) :
|
||||
b1(nElements * 2, fpRate, nTweak), b2(nElements * 2, fpRate, nTweak)
|
||||
{
|
||||
// Implemented using two bloom filters of 2 * nElements each.
|
||||
// We fill them up, and clear them, staggered, every nElements
|
||||
// inserted, so at least one always contains the last nElements
|
||||
// inserted.
|
||||
nBloomSize = nElements * 2;
|
||||
nInsertions = 0;
|
||||
}
|
||||
|
||||
void CRollingBloomFilter::insert(const std::vector<unsigned char>& vKey)
|
||||
{
|
||||
if (nInsertions == 0) {
|
||||
b1.clear();
|
||||
} else if (nInsertions == nBloomSize / 2) {
|
||||
b2.clear();
|
||||
}
|
||||
b1.insert(vKey);
|
||||
b2.insert(vKey);
|
||||
if (++nInsertions == nBloomSize) {
|
||||
nInsertions = 0;
|
||||
}
|
||||
}
|
||||
|
||||
bool CRollingBloomFilter::contains(const std::vector<unsigned char>& vKey) const
|
||||
{
|
||||
if (nInsertions < nBloomSize / 2) {
|
||||
return b2.contains(vKey);
|
||||
}
|
||||
return b1.contains(vKey);
|
||||
}
|
||||
|
||||
void CRollingBloomFilter::clear()
|
||||
{
|
||||
b1.clear();
|
||||
b2.clear();
|
||||
nInsertions = 0;
|
||||
}
|
||||
|
28
src/bloom.h
28
src/bloom.h
@ -53,6 +53,10 @@ private:
|
||||
|
||||
unsigned int Hash(unsigned int nHashNum, const std::vector<unsigned char>& vDataToHash) const;
|
||||
|
||||
// Private constructor for CRollingBloomFilter, no restrictions on size
|
||||
CBloomFilter(unsigned int nElements, double nFPRate, unsigned int nTweak);
|
||||
friend class CRollingBloomFilter;
|
||||
|
||||
public:
|
||||
/**
|
||||
* Creates a new bloom filter which will provide the given fp rate when filled with the given number of elements
|
||||
@ -97,4 +101,28 @@ public:
|
||||
void UpdateEmptyFull();
|
||||
};
|
||||
|
||||
/**
|
||||
* RollingBloomFilter is a probabilistic "keep track of most recently inserted" set.
|
||||
* Construct it with the number of items to keep track of, and a false-positive rate.
|
||||
*
|
||||
* contains(item) will always return true if item was one of the last N things
|
||||
* insert()'ed ... but may also return true for items that were not inserted.
|
||||
*/
|
||||
class CRollingBloomFilter
|
||||
{
|
||||
public:
|
||||
CRollingBloomFilter(unsigned int nElements, double nFPRate, unsigned int nTweak);
|
||||
|
||||
void insert(const std::vector<unsigned char>& vKey);
|
||||
bool contains(const std::vector<unsigned char>& vKey) const;
|
||||
|
||||
void clear();
|
||||
|
||||
private:
|
||||
unsigned int nBloomSize;
|
||||
unsigned int nInsertions;
|
||||
CBloomFilter b1, b2;
|
||||
};
|
||||
|
||||
|
||||
#endif // BITCOIN_BLOOM_H
|
||||
|
10
src/main.cpp
10
src/main.cpp
@ -3995,7 +3995,7 @@ bool static ProcessMessage(CNode* pfrom, string strCommand, CDataStream& vRecv,
|
||||
{
|
||||
LOCK(cs_vNodes);
|
||||
// Use deterministic randomness to send to the same nodes for 24 hours
|
||||
// at a time so the setAddrKnowns of the chosen nodes prevent repeats
|
||||
// at a time so the addrKnowns of the chosen nodes prevent repeats
|
||||
static uint256 hashSalt;
|
||||
if (hashSalt.IsNull())
|
||||
hashSalt = GetRandHash();
|
||||
@ -4779,9 +4779,9 @@ bool SendMessages(CNode* pto, bool fSendTrickle)
|
||||
LOCK(cs_vNodes);
|
||||
BOOST_FOREACH(CNode* pnode, vNodes)
|
||||
{
|
||||
// Periodically clear setAddrKnown to allow refresh broadcasts
|
||||
// Periodically clear addrKnown to allow refresh broadcasts
|
||||
if (nLastRebroadcast)
|
||||
pnode->setAddrKnown.clear();
|
||||
pnode->addrKnown.clear();
|
||||
|
||||
// Rebroadcast our address
|
||||
AdvertizeLocal(pnode);
|
||||
@ -4799,9 +4799,9 @@ bool SendMessages(CNode* pto, bool fSendTrickle)
|
||||
vAddr.reserve(pto->vAddrToSend.size());
|
||||
BOOST_FOREACH(const CAddress& addr, pto->vAddrToSend)
|
||||
{
|
||||
// returns true if wasn't already contained in the set
|
||||
if (pto->setAddrKnown.insert(addr).second)
|
||||
if (!pto->addrKnown.contains(addr.GetKey()))
|
||||
{
|
||||
pto->addrKnown.insert(addr.GetKey());
|
||||
vAddr.push_back(addr);
|
||||
// receiver rejects addr messages larger than 1000
|
||||
if (vAddr.size() >= 1000)
|
||||
|
36
src/mruset.h
36
src/mruset.h
@ -1,12 +1,12 @@
|
||||
// Copyright (c) 2012 The Bitcoin Core developers
|
||||
// Copyright (c) 2012-2015 The Bitcoin Core developers
|
||||
// Distributed under the MIT software license, see the accompanying
|
||||
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
|
||||
|
||||
#ifndef BITCOIN_MRUSET_H
|
||||
#define BITCOIN_MRUSET_H
|
||||
|
||||
#include <deque>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
|
||||
/** STL-like set container that only keeps the most recent N elements. */
|
||||
@ -22,11 +22,13 @@ public:
|
||||
|
||||
protected:
|
||||
std::set<T> set;
|
||||
std::deque<T> queue;
|
||||
size_type nMaxSize;
|
||||
std::vector<iterator> order;
|
||||
size_type first_used;
|
||||
size_type first_unused;
|
||||
const size_type nMaxSize;
|
||||
|
||||
public:
|
||||
mruset(size_type nMaxSizeIn = 0) { nMaxSize = nMaxSizeIn; }
|
||||
mruset(size_type nMaxSizeIn = 1) : nMaxSize(nMaxSizeIn) { clear(); }
|
||||
iterator begin() const { return set.begin(); }
|
||||
iterator end() const { return set.end(); }
|
||||
size_type size() const { return set.size(); }
|
||||
@ -36,7 +38,9 @@ public:
|
||||
void clear()
|
||||
{
|
||||
set.clear();
|
||||
queue.clear();
|
||||
order.assign(nMaxSize, set.end());
|
||||
first_used = 0;
|
||||
first_unused = 0;
|
||||
}
|
||||
bool inline friend operator==(const mruset<T>& a, const mruset<T>& b) { return a.set == b.set; }
|
||||
bool inline friend operator==(const mruset<T>& a, const std::set<T>& b) { return a.set == b; }
|
||||
@ -45,25 +49,17 @@ public:
|
||||
{
|
||||
std::pair<iterator, bool> ret = set.insert(x);
|
||||
if (ret.second) {
|
||||
if (nMaxSize && queue.size() == nMaxSize) {
|
||||
set.erase(queue.front());
|
||||
queue.pop_front();
|
||||
if (set.size() == nMaxSize + 1) {
|
||||
set.erase(order[first_used]);
|
||||
order[first_used] = set.end();
|
||||
if (++first_used == nMaxSize) first_used = 0;
|
||||
}
|
||||
queue.push_back(x);
|
||||
order[first_unused] = ret.first;
|
||||
if (++first_unused == nMaxSize) first_unused = 0;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
size_type max_size() const { return nMaxSize; }
|
||||
size_type max_size(size_type s)
|
||||
{
|
||||
if (s)
|
||||
while (queue.size() > s) {
|
||||
set.erase(queue.front());
|
||||
queue.pop_front();
|
||||
}
|
||||
nMaxSize = s;
|
||||
return nMaxSize;
|
||||
}
|
||||
};
|
||||
|
||||
#endif // BITCOIN_MRUSET_H
|
||||
|
@ -1905,7 +1905,10 @@ bool CAddrDB::Read(CAddrMan& addr)
|
||||
unsigned int ReceiveFloodSize() { return 1000*GetArg("-maxreceivebuffer", 5*1000); }
|
||||
unsigned int SendBufferSize() { return 1000*GetArg("-maxsendbuffer", 1*1000); }
|
||||
|
||||
CNode::CNode(SOCKET hSocketIn, CAddress addrIn, std::string addrNameIn, bool fInboundIn) : ssSend(SER_NETWORK, INIT_PROTO_VERSION), setAddrKnown(5000)
|
||||
CNode::CNode(SOCKET hSocketIn, CAddress addrIn, std::string addrNameIn, bool fInboundIn) :
|
||||
ssSend(SER_NETWORK, INIT_PROTO_VERSION),
|
||||
addrKnown(5000, 0.001, insecure_rand()),
|
||||
setInventoryKnown(SendBufferSize() / 1000)
|
||||
{
|
||||
nServices = 0;
|
||||
hSocket = hSocketIn;
|
||||
@ -1934,7 +1937,6 @@ CNode::CNode(SOCKET hSocketIn, CAddress addrIn, std::string addrNameIn, bool fIn
|
||||
nStartingHeight = -1;
|
||||
fGetAddr = false;
|
||||
fRelayTxes = false;
|
||||
setInventoryKnown.max_size(SendBufferSize() / 1000);
|
||||
pfilter = new CBloomFilter();
|
||||
nPingNonceSent = 0;
|
||||
nPingUsecStart = 0;
|
||||
|
@ -300,7 +300,7 @@ public:
|
||||
|
||||
// flood relay
|
||||
std::vector<CAddress> vAddrToSend;
|
||||
mruset<CAddress> setAddrKnown;
|
||||
CRollingBloomFilter addrKnown;
|
||||
bool fGetAddr;
|
||||
std::set<uint256> setKnown;
|
||||
|
||||
@ -380,7 +380,7 @@ public:
|
||||
|
||||
void AddAddressKnown(const CAddress& addr)
|
||||
{
|
||||
setAddrKnown.insert(addr);
|
||||
addrKnown.insert(addr.GetKey());
|
||||
}
|
||||
|
||||
void PushAddress(const CAddress& addr)
|
||||
@ -388,7 +388,7 @@ public:
|
||||
// Known checking here is only to save space from duplicates.
|
||||
// SendMessages will filter it again for knowns that were added
|
||||
// after addresses were pushed.
|
||||
if (addr.IsValid() && !setAddrKnown.count(addr)) {
|
||||
if (addr.IsValid() && !addrKnown.contains(addr.GetKey())) {
|
||||
if (vAddrToSend.size() >= MAX_ADDR_TO_SEND) {
|
||||
vAddrToSend[insecure_rand() % vAddrToSend.size()] = addr;
|
||||
} else {
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include "clientversion.h"
|
||||
#include "key.h"
|
||||
#include "merkleblock.h"
|
||||
#include "random.h"
|
||||
#include "serialize.h"
|
||||
#include "streams.h"
|
||||
#include "uint256.h"
|
||||
@ -459,4 +460,81 @@ BOOST_AUTO_TEST_CASE(merkle_block_4_test_update_none)
|
||||
BOOST_CHECK(!filter.contains(COutPoint(uint256S("0x02981fa052f0481dbc5868f4fc2166035a10f27a03cfd2de67326471df5bc041"), 0)));
|
||||
}
|
||||
|
||||
static std::vector<unsigned char> RandomData()
|
||||
{
|
||||
uint256 r = GetRandHash();
|
||||
return std::vector<unsigned char>(r.begin(), r.end());
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(rolling_bloom)
|
||||
{
|
||||
// last-100-entry, 1% false positive:
|
||||
CRollingBloomFilter rb1(100, 0.01, 0);
|
||||
|
||||
// Overfill:
|
||||
static const int DATASIZE=399;
|
||||
std::vector<unsigned char> data[DATASIZE];
|
||||
for (int i = 0; i < DATASIZE; i++) {
|
||||
data[i] = RandomData();
|
||||
rb1.insert(data[i]);
|
||||
}
|
||||
// Last 100 guaranteed to be remembered:
|
||||
for (int i = 299; i < DATASIZE; i++) {
|
||||
BOOST_CHECK(rb1.contains(data[i]));
|
||||
}
|
||||
|
||||
// false positive rate is 1%, so we should get about 100 hits if
|
||||
// testing 10,000 random keys. We get worst-case false positive
|
||||
// behavior when the filter is as full as possible, which is
|
||||
// when we've inserted one minus an integer multiple of nElement*2.
|
||||
unsigned int nHits = 0;
|
||||
for (int i = 0; i < 10000; i++) {
|
||||
if (rb1.contains(RandomData()))
|
||||
++nHits;
|
||||
}
|
||||
// Run test_bitcoin with --log_level=message to see BOOST_TEST_MESSAGEs:
|
||||
BOOST_TEST_MESSAGE("RollingBloomFilter got " << nHits << " false positives (~100 expected)");
|
||||
|
||||
// Insanely unlikely to get a fp count outside this range:
|
||||
BOOST_CHECK(nHits > 25);
|
||||
BOOST_CHECK(nHits < 175);
|
||||
|
||||
BOOST_CHECK(rb1.contains(data[DATASIZE-1]));
|
||||
rb1.clear();
|
||||
BOOST_CHECK(!rb1.contains(data[DATASIZE-1]));
|
||||
|
||||
// Now roll through data, make sure last 100 entries
|
||||
// are always remembered:
|
||||
for (int i = 0; i < DATASIZE; i++) {
|
||||
if (i >= 100)
|
||||
BOOST_CHECK(rb1.contains(data[i-100]));
|
||||
rb1.insert(data[i]);
|
||||
}
|
||||
|
||||
// Insert 999 more random entries:
|
||||
for (int i = 0; i < 999; i++) {
|
||||
rb1.insert(RandomData());
|
||||
}
|
||||
// Sanity check to make sure the filter isn't just filling up:
|
||||
nHits = 0;
|
||||
for (int i = 0; i < DATASIZE; i++) {
|
||||
if (rb1.contains(data[i]))
|
||||
++nHits;
|
||||
}
|
||||
// Expect about 5 false positives, more than 100 means
|
||||
// something is definitely broken.
|
||||
BOOST_TEST_MESSAGE("RollingBloomFilter got " << nHits << " false positives (~5 expected)");
|
||||
BOOST_CHECK(nHits < 100);
|
||||
|
||||
// last-1000-entry, 0.01% false positive:
|
||||
CRollingBloomFilter rb2(1000, 0.001, 0);
|
||||
for (int i = 0; i < DATASIZE; i++) {
|
||||
rb2.insert(data[i]);
|
||||
}
|
||||
// ... room for all of them:
|
||||
for (int i = 0; i < DATASIZE; i++) {
|
||||
BOOST_CHECK(rb2.contains(data[i]));
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_SUITE_END()
|
||||
|
@ -17,82 +17,64 @@
|
||||
|
||||
using namespace std;
|
||||
|
||||
class mrutester
|
||||
{
|
||||
private:
|
||||
mruset<int> mru;
|
||||
std::set<int> set;
|
||||
|
||||
public:
|
||||
mrutester() { mru.max_size(MAX_SIZE); }
|
||||
int size() const { return set.size(); }
|
||||
|
||||
void insert(int n)
|
||||
{
|
||||
mru.insert(n);
|
||||
set.insert(n);
|
||||
BOOST_CHECK(mru == set);
|
||||
}
|
||||
};
|
||||
|
||||
BOOST_FIXTURE_TEST_SUITE(mruset_tests, BasicTestingSetup)
|
||||
|
||||
// Test that an mruset behaves like a set, as long as no more than MAX_SIZE elements are in it
|
||||
BOOST_AUTO_TEST_CASE(mruset_like_set)
|
||||
BOOST_AUTO_TEST_CASE(mruset_test)
|
||||
{
|
||||
// The mruset being tested.
|
||||
mruset<int> mru(5000);
|
||||
|
||||
for (int nTest=0; nTest<NUM_TESTS; nTest++)
|
||||
{
|
||||
mrutester tester;
|
||||
while (tester.size() < MAX_SIZE)
|
||||
tester.insert(GetRandInt(2 * MAX_SIZE));
|
||||
// Run the test 10 times.
|
||||
for (int test = 0; test < 10; test++) {
|
||||
// Reset mru.
|
||||
mru.clear();
|
||||
|
||||
// A deque + set to simulate the mruset.
|
||||
std::deque<int> rep;
|
||||
std::set<int> all;
|
||||
|
||||
// Insert 10000 random integers below 15000.
|
||||
for (int j=0; j<10000; j++) {
|
||||
int add = GetRandInt(15000);
|
||||
mru.insert(add);
|
||||
|
||||
// Add the number to rep/all as well.
|
||||
if (all.count(add) == 0) {
|
||||
all.insert(add);
|
||||
rep.push_back(add);
|
||||
if (all.size() == 5001) {
|
||||
all.erase(rep.front());
|
||||
rep.pop_front();
|
||||
}
|
||||
}
|
||||
|
||||
// Do a full comparison between mru and the simulated mru every 1000 and every 5001 elements.
|
||||
if (j % 1000 == 0 || j % 5001 == 0) {
|
||||
mruset<int> mru2 = mru; // Also try making a copy
|
||||
|
||||
// Check that all elements that should be in there, are in there.
|
||||
BOOST_FOREACH(int x, rep) {
|
||||
BOOST_CHECK(mru.count(x));
|
||||
BOOST_CHECK(mru2.count(x));
|
||||
}
|
||||
|
||||
// Test that an mruset's size never exceeds its max_size
|
||||
BOOST_AUTO_TEST_CASE(mruset_limited_size)
|
||||
{
|
||||
for (int nTest=0; nTest<NUM_TESTS; nTest++)
|
||||
{
|
||||
mruset<int> mru(MAX_SIZE);
|
||||
for (int nAction=0; nAction<3*MAX_SIZE; nAction++)
|
||||
{
|
||||
int n = GetRandInt(2 * MAX_SIZE);
|
||||
mru.insert(n);
|
||||
BOOST_CHECK(mru.size() <= MAX_SIZE);
|
||||
// Check that all elements that are in there, should be in there.
|
||||
BOOST_FOREACH(int x, mru) {
|
||||
BOOST_CHECK(all.count(x));
|
||||
}
|
||||
|
||||
// Check that all elements that are in there, should be in there.
|
||||
BOOST_FOREACH(int x, mru2) {
|
||||
BOOST_CHECK(all.count(x));
|
||||
}
|
||||
|
||||
for (int t = 0; t < 10; t++) {
|
||||
int r = GetRandInt(15000);
|
||||
BOOST_CHECK(all.count(r) == mru.count(r));
|
||||
BOOST_CHECK(all.count(r) == mru2.count(r));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 16-bit permutation function
|
||||
int static permute(int n)
|
||||
{
|
||||
// hexadecimals of pi; verified to be linearly independent
|
||||
static const int table[16] = {0x243F, 0x6A88, 0x85A3, 0x08D3, 0x1319, 0x8A2E, 0x0370, 0x7344,
|
||||
0xA409, 0x3822, 0x299F, 0x31D0, 0x082E, 0xFA98, 0xEC4E, 0x6C89};
|
||||
|
||||
int ret = 0;
|
||||
for (int bit=0; bit<16; bit++)
|
||||
if (n & (1<<bit))
|
||||
ret ^= table[bit];
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Test that an mruset acts like a moving window, if no duplicate elements are added
|
||||
BOOST_AUTO_TEST_CASE(mruset_window)
|
||||
{
|
||||
mruset<int> mru(MAX_SIZE);
|
||||
for (int n=0; n<10*MAX_SIZE; n++)
|
||||
{
|
||||
mru.insert(permute(n));
|
||||
|
||||
set<int> tester;
|
||||
for (int m=max(0,n-MAX_SIZE+1); m<=n; m++)
|
||||
tester.insert(permute(m));
|
||||
|
||||
BOOST_CHECK(mru == tester);
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user