Internal RNG for approximateBestSubset to prevent degenerate behavior.

This fixes test_bitcoin failures on openbsd reported by dhill on IRC.

  On some systems rand() is a simple LCG over 2^31 and so it produces
an even-odd sequence.  ApproximateBestSubset was only using the least
significant bit and so every run of the iterative solver would be the
same for some inputs, resulting in some pretty dumb decisions.

Using something other than the least significant bit would paper over
the issue but who knows what other way a system's rand() might get us
here.  Instead we use an internal RNG with a period of something like
2^60 which is well behaved.  This also makes it possible to make the
selection deterministic for the tests, if we wanted to implement that.
This commit is contained in:
Gregory Maxwell 2013-02-15 15:27:57 -08:00
parent 2f0fa79db2
commit 907a2aa4c7
4 changed files with 111 additions and 13 deletions

View File

@ -261,4 +261,66 @@ BOOST_AUTO_TEST_CASE(util_IsHex)
BOOST_CHECK(!IsHex("0x0000")); BOOST_CHECK(!IsHex("0x0000"));
} }
BOOST_AUTO_TEST_CASE(util_seed_insecure_rand)
{
// Expected results for the determinstic seed.
const uint32_t exp_vals[11] = { 91632771U,1889679809U,3842137544U,3256031132U,
1761911779U, 489223532U,2692793790U,2737472863U,
2796262275U,1309899767U,840571781U};
// Expected 0s in rand()%(idx+2) for the determinstic seed.
const int exp_count[9] = {5013,3346,2415,1972,1644,1386,1176,1096,1009};
int i;
int count=0;
seed_insecure_rand();
//Does the non-determistic rand give us results that look too like the determinstic one?
for (i=0;i<10;i++)
{
int match = 0;
uint32_t rval = insecure_rand();
for (int j=0;j<11;j++)match |= rval==exp_vals[j];
count += match;
}
// sum(binomial(10,i)*(11/(2^32))^i*(1-(11/(2^32)))^(10-i),i,0,4) ~= 1-1/2^134.73
// So _very_ unlikely to throw a false failure here.
BOOST_CHECK(count<=4);
for (int mod=2;mod<11;mod++)
{
int mask = 1;
// Really rough binomal confidence approximation.
int err = 30*10000./mod*sqrt((1./mod*(1-1./mod))/10000.);
//mask is 2^ceil(log2(mod))-1
while(mask<mod-1)mask=(mask<<1)+1;
count = 0;
//How often does it get a zero from the uniform range [0,mod)?
for (i=0;i<10000;i++)
{
uint32_t rval;
do{
rval=insecure_rand()&mask;
}while(rval>=(uint32_t)mod);
count += rval==0;
}
BOOST_CHECK(count<=10000/mod+err);
BOOST_CHECK(count>=10000/mod-err);
}
seed_insecure_rand(true);
for (i=0;i<11;i++)
{
BOOST_CHECK_EQUAL(insecure_rand(),exp_vals[i]);
}
for (int mod=2;mod<11;mod++)
{
count = 0;
for (i=0;i<10000;i++) count += insecure_rand()%mod==0;
BOOST_CHECK_EQUAL(count,exp_count[mod-2]);
}
}
BOOST_AUTO_TEST_SUITE_END() BOOST_AUTO_TEST_SUITE_END()

View File

@ -1276,12 +1276,26 @@ void AddTimeData(const CNetAddr& ip, int64 nTime)
} }
} }
uint32_t insecure_rand_Rz = 11;
uint32_t insecure_rand_Rw = 11;
void seed_insecure_rand(bool fDeterministic)
{
//The seed values have some unlikely fixed points which we avoid.
if(fDeterministic)
{
insecure_rand_Rz = insecure_rand_Rw = 11;
} else {
uint32_t tmp;
do{
RAND_bytes((unsigned char*)&tmp,4);
}while(tmp==0 || tmp==0x9068ffffU);
insecure_rand_Rz=tmp;
do{
RAND_bytes((unsigned char*)&tmp,4);
}while(tmp==0 || tmp==0x464fffffU);
insecure_rand_Rw=tmp;
}
}
string FormatVersion(int nVersion) string FormatVersion(int nVersion)
{ {

View File

@ -403,13 +403,27 @@ bool SoftSetArg(const std::string& strArg, const std::string& strValue);
*/ */
bool SoftSetBoolArg(const std::string& strArg, bool fValue); bool SoftSetBoolArg(const std::string& strArg, bool fValue);
/**
* MWC RNG of George Marsaglia
* This is intended to be fast. It has a period of 2^59.3, though the
* least significant 16 bits only have a period of about 2^30.1.
*
* @return random value
*/
extern uint32_t insecure_rand_Rz;
extern uint32_t insecure_rand_Rw;
static inline uint32_t insecure_rand(void)
{
insecure_rand_Rz=36969*(insecure_rand_Rz&65535)+(insecure_rand_Rz>>16);
insecure_rand_Rw=18000*(insecure_rand_Rw&65535)+(insecure_rand_Rw>>16);
return (insecure_rand_Rw<<16)+insecure_rand_Rz;
}
/**
* Seed insecure_rand using the random pool.
* @param Deterministic Use a determinstic seed
*/
void seed_insecure_rand(bool fDeterministic=false);
/** Median filter over a stream of values. /** Median filter over a stream of values.
* Returns the median of the last N numbers * Returns the median of the last N numbers

View File

@ -973,6 +973,8 @@ static void ApproximateBestSubset(vector<pair<int64, pair<const CWalletTx*,unsig
vfBest.assign(vValue.size(), true); vfBest.assign(vValue.size(), true);
nBest = nTotalLower; nBest = nTotalLower;
seed_insecure_rand();
for (int nRep = 0; nRep < iterations && nBest != nTargetValue; nRep++) for (int nRep = 0; nRep < iterations && nBest != nTargetValue; nRep++)
{ {
vfIncluded.assign(vValue.size(), false); vfIncluded.assign(vValue.size(), false);
@ -982,7 +984,13 @@ static void ApproximateBestSubset(vector<pair<int64, pair<const CWalletTx*,unsig
{ {
for (unsigned int i = 0; i < vValue.size(); i++) for (unsigned int i = 0; i < vValue.size(); i++)
{ {
if (nPass == 0 ? rand() % 2 : !vfIncluded[i]) //The solver here uses a randomized algorithm,
//the randomness serves no real security purpose but is just
//needed to prevent degenerate behavior and it is important
//that the rng fast. We do not use a constant random sequence,
//because there may be some privacy improvement by making
//the selection random.
if (nPass == 0 ? insecure_rand()&1 : !vfIncluded[i])
{ {
nTotal += vValue[i].first; nTotal += vValue[i].first;
vfIncluded[i] = true; vfIncluded[i] = true;