mirror of
https://github.com/dashpay/dash.git
synced 2024-12-25 12:02:48 +01:00
merge bitcoin#23994: Consolidate all uses of the fast range mapping technique in util/fastrange.h
This commit is contained in:
parent
1cf996ac8c
commit
772a3affce
@ -310,6 +310,7 @@ BITCOIN_CORE_H = \
|
|||||||
util/check.h \
|
util/check.h \
|
||||||
util/enumerate.h \
|
util/enumerate.h \
|
||||||
util/error.h \
|
util/error.h \
|
||||||
|
util/fastrange.h \
|
||||||
util/fees.h \
|
util/fees.h \
|
||||||
util/golombrice.h \
|
util/golombrice.h \
|
||||||
util/hash_type.h \
|
util/hash_type.h \
|
||||||
|
@ -29,7 +29,7 @@ uint64_t GCSFilter::HashToRange(const Element& element) const
|
|||||||
uint64_t hash = CSipHasher(m_params.m_siphash_k0, m_params.m_siphash_k1)
|
uint64_t hash = CSipHasher(m_params.m_siphash_k0, m_params.m_siphash_k1)
|
||||||
.Write(element.data(), element.size())
|
.Write(element.data(), element.size())
|
||||||
.Finalize();
|
.Finalize();
|
||||||
return MapIntoRange(hash, m_F);
|
return FastRange64(hash, m_F);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<uint64_t> GCSFilter::BuildHashedSet(const ElementSet& elements) const
|
std::vector<uint64_t> GCSFilter::BuildHashedSet(const ElementSet& elements) const
|
||||||
|
@ -14,6 +14,7 @@
|
|||||||
#include <script/standard.h>
|
#include <script/standard.h>
|
||||||
#include <random.h>
|
#include <random.h>
|
||||||
#include <streams.h>
|
#include <streams.h>
|
||||||
|
#include <util/fastrange.h>
|
||||||
|
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
@ -307,14 +308,6 @@ static inline uint32_t RollingBloomHash(unsigned int nHashNum, uint32_t nTweak,
|
|||||||
return MurmurHash3(nHashNum * 0xFBA4C795 + nTweak, vDataToHash);
|
return MurmurHash3(nHashNum * 0xFBA4C795 + nTweak, vDataToHash);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// A replacement for x % n. This assumes that x and n are 32bit integers, and x is a uniformly random distributed 32bit value
|
|
||||||
// which should be the case for a good hash.
|
|
||||||
// See https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
|
|
||||||
static inline uint32_t FastMod(uint32_t x, size_t n) {
|
|
||||||
return ((uint64_t)x * (uint64_t)n) >> 32;
|
|
||||||
}
|
|
||||||
|
|
||||||
void CRollingBloomFilter::insert(const std::vector<unsigned char>& vKey)
|
void CRollingBloomFilter::insert(const std::vector<unsigned char>& vKey)
|
||||||
{
|
{
|
||||||
if (nEntriesThisGeneration == nEntriesPerGeneration) {
|
if (nEntriesThisGeneration == nEntriesPerGeneration) {
|
||||||
@ -339,7 +332,7 @@ void CRollingBloomFilter::insert(const std::vector<unsigned char>& vKey)
|
|||||||
uint32_t h = RollingBloomHash(n, nTweak, vKey);
|
uint32_t h = RollingBloomHash(n, nTweak, vKey);
|
||||||
int bit = h & 0x3F;
|
int bit = h & 0x3F;
|
||||||
/* FastMod works with the upper bits of h, so it is safe to ignore that the lower bits of h are already used for bit. */
|
/* FastMod works with the upper bits of h, so it is safe to ignore that the lower bits of h are already used for bit. */
|
||||||
uint32_t pos = FastMod(h, data.size());
|
uint32_t pos = FastRange32(h, data.size());
|
||||||
/* The lowest bit of pos is ignored, and set to zero for the first bit, and to one for the second. */
|
/* The lowest bit of pos is ignored, and set to zero for the first bit, and to one for the second. */
|
||||||
data[pos & ~1] = (data[pos & ~1] & ~(((uint64_t)1) << bit)) | ((uint64_t)(nGeneration & 1)) << bit;
|
data[pos & ~1] = (data[pos & ~1] & ~(((uint64_t)1) << bit)) | ((uint64_t)(nGeneration & 1)) << bit;
|
||||||
data[pos | 1] = (data[pos | 1] & ~(((uint64_t)1) << bit)) | ((uint64_t)(nGeneration >> 1)) << bit;
|
data[pos | 1] = (data[pos | 1] & ~(((uint64_t)1) << bit)) | ((uint64_t)(nGeneration >> 1)) << bit;
|
||||||
@ -357,7 +350,7 @@ bool CRollingBloomFilter::contains(const std::vector<unsigned char>& vKey) const
|
|||||||
for (int n = 0; n < nHashFuncs; n++) {
|
for (int n = 0; n < nHashFuncs; n++) {
|
||||||
uint32_t h = RollingBloomHash(n, nTweak, vKey);
|
uint32_t h = RollingBloomHash(n, nTweak, vKey);
|
||||||
int bit = h & 0x3F;
|
int bit = h & 0x3F;
|
||||||
uint32_t pos = FastMod(h, data.size());
|
uint32_t pos = FastRange32(h, data.size());
|
||||||
/* If the relevant bit is not set in either data[pos & ~1] or data[pos | 1], the filter does not contain vKey */
|
/* If the relevant bit is not set in either data[pos & ~1] or data[pos | 1], the filter does not contain vKey */
|
||||||
if (!(((data[pos & ~1] | data[pos | 1]) >> bit) & 1)) {
|
if (!(((data[pos & ~1] | data[pos | 1]) >> bit) & 1)) {
|
||||||
return false;
|
return false;
|
||||||
|
@ -5,6 +5,8 @@
|
|||||||
#ifndef BITCOIN_CUCKOOCACHE_H
|
#ifndef BITCOIN_CUCKOOCACHE_H
|
||||||
#define BITCOIN_CUCKOOCACHE_H
|
#define BITCOIN_CUCKOOCACHE_H
|
||||||
|
|
||||||
|
#include <util/fastrange.h>
|
||||||
|
|
||||||
#include <array>
|
#include <array>
|
||||||
#include <algorithm> // std::find
|
#include <algorithm> // std::find
|
||||||
#include <atomic>
|
#include <atomic>
|
||||||
@ -219,13 +221,8 @@ private:
|
|||||||
* One option would be to implement the same trick the compiler uses and compute the
|
* One option would be to implement the same trick the compiler uses and compute the
|
||||||
* constants for exact division based on the size, as described in "{N}-bit Unsigned
|
* constants for exact division based on the size, as described in "{N}-bit Unsigned
|
||||||
* Division via {N}-bit Multiply-Add" by Arch D. Robison in 2005. But that code is
|
* Division via {N}-bit Multiply-Add" by Arch D. Robison in 2005. But that code is
|
||||||
* somewhat complicated and the result is still slower than other options:
|
* somewhat complicated and the result is still slower than an even simpler option:
|
||||||
*
|
* see the FastRange32 function in util/fastrange.h.
|
||||||
* Instead we treat the 32-bit random number as a Q32 fixed-point number in the range
|
|
||||||
* [0, 1) and simply multiply it by the size. Then we just shift the result down by
|
|
||||||
* 32-bits to get our bucket number. The result has non-uniformity the same as a
|
|
||||||
* mod, but it is much faster to compute. More about this technique can be found at
|
|
||||||
* http://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/ .
|
|
||||||
*
|
*
|
||||||
* The resulting non-uniformity is also more equally distributed which would be
|
* The resulting non-uniformity is also more equally distributed which would be
|
||||||
* advantageous for something like linear probing, though it shouldn't matter
|
* advantageous for something like linear probing, though it shouldn't matter
|
||||||
@ -241,14 +238,14 @@ private:
|
|||||||
*/
|
*/
|
||||||
inline std::array<uint32_t, 8> compute_hashes(const Element& e) const
|
inline std::array<uint32_t, 8> compute_hashes(const Element& e) const
|
||||||
{
|
{
|
||||||
return {{(uint32_t)(((uint64_t)hash_function.template operator()<0>(e) * (uint64_t)size) >> 32),
|
return {{FastRange32(hash_function.template operator()<0>(e), size),
|
||||||
(uint32_t)(((uint64_t)hash_function.template operator()<1>(e) * (uint64_t)size) >> 32),
|
FastRange32(hash_function.template operator()<1>(e), size),
|
||||||
(uint32_t)(((uint64_t)hash_function.template operator()<2>(e) * (uint64_t)size) >> 32),
|
FastRange32(hash_function.template operator()<2>(e), size),
|
||||||
(uint32_t)(((uint64_t)hash_function.template operator()<3>(e) * (uint64_t)size) >> 32),
|
FastRange32(hash_function.template operator()<3>(e), size),
|
||||||
(uint32_t)(((uint64_t)hash_function.template operator()<4>(e) * (uint64_t)size) >> 32),
|
FastRange32(hash_function.template operator()<4>(e), size),
|
||||||
(uint32_t)(((uint64_t)hash_function.template operator()<5>(e) * (uint64_t)size) >> 32),
|
FastRange32(hash_function.template operator()<5>(e), size),
|
||||||
(uint32_t)(((uint64_t)hash_function.template operator()<6>(e) * (uint64_t)size) >> 32),
|
FastRange32(hash_function.template operator()<6>(e), size),
|
||||||
(uint32_t)(((uint64_t)hash_function.template operator()<7>(e) * (uint64_t)size) >> 32)}};
|
FastRange32(hash_function.template operator()<7>(e), size)}};
|
||||||
}
|
}
|
||||||
|
|
||||||
/** invalid returns a special index that can never be inserted to
|
/** invalid returns a special index that can never be inserted to
|
||||||
|
@ -25,7 +25,7 @@ uint64_t HashToRange(const std::vector<uint8_t>& element, const uint64_t f)
|
|||||||
const uint64_t hash = CSipHasher(0x0706050403020100ULL, 0x0F0E0D0C0B0A0908ULL)
|
const uint64_t hash = CSipHasher(0x0706050403020100ULL, 0x0F0E0D0C0B0A0908ULL)
|
||||||
.Write(element.data(), element.size())
|
.Write(element.data(), element.size())
|
||||||
.Finalize();
|
.Finalize();
|
||||||
return MapIntoRange(hash, f);
|
return FastRange64(hash, f);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<uint64_t> BuildHashedSet(const std::unordered_set<std::vector<uint8_t>, ByteVectorHash>& elements, const uint64_t f)
|
std::vector<uint64_t> BuildHashedSet(const std::unordered_set<std::vector<uint8_t>, ByteVectorHash>& elements, const uint64_t f)
|
||||||
|
51
src/util/fastrange.h
Normal file
51
src/util/fastrange.h
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
// Copyright (c) 2018-2020 The Bitcoin Core developers
|
||||||
|
// Distributed under the MIT software license, see the accompanying
|
||||||
|
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
|
||||||
|
|
||||||
|
#ifndef BITCOIN_UTIL_FASTRANGE_H
|
||||||
|
#define BITCOIN_UTIL_FASTRANGE_H
|
||||||
|
|
||||||
|
#include <cstdint>
|
||||||
|
|
||||||
|
/* This file offers implementations of the fast range reduction technique described
|
||||||
|
* in https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
|
||||||
|
*
|
||||||
|
* In short, they take an integer x and a range n, and return the upper bits of
|
||||||
|
* (x * n). If x is uniformly distributed over its domain, the result is as close to
|
||||||
|
* uniformly distributed over [0, n) as (x mod n) would be, but significantly faster.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/** Fast range reduction with 32-bit input and 32-bit range. */
|
||||||
|
static inline uint32_t FastRange32(uint32_t x, uint32_t n)
|
||||||
|
{
|
||||||
|
return (uint64_t{x} * n) >> 32;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Fast range reduction with 64-bit input and 64-bit range. */
|
||||||
|
static inline uint64_t FastRange64(uint64_t x, uint64_t n)
|
||||||
|
{
|
||||||
|
#ifdef __SIZEOF_INT128__
|
||||||
|
return (static_cast<unsigned __int128>(x) * static_cast<unsigned __int128>(n)) >> 64;
|
||||||
|
#else
|
||||||
|
// To perform the calculation on 64-bit numbers without losing the
|
||||||
|
// result to overflow, split the numbers into the most significant and
|
||||||
|
// least significant 32 bits and perform multiplication piece-wise.
|
||||||
|
//
|
||||||
|
// See: https://stackoverflow.com/a/26855440
|
||||||
|
const uint64_t x_hi = x >> 32;
|
||||||
|
const uint64_t x_lo = x & 0xFFFFFFFF;
|
||||||
|
const uint64_t n_hi = n >> 32;
|
||||||
|
const uint64_t n_lo = n & 0xFFFFFFFF;
|
||||||
|
|
||||||
|
const uint64_t ac = x_hi * n_hi;
|
||||||
|
const uint64_t ad = x_hi * n_lo;
|
||||||
|
const uint64_t bc = x_lo * n_hi;
|
||||||
|
const uint64_t bd = x_lo * n_lo;
|
||||||
|
|
||||||
|
const uint64_t mid34 = (bd >> 32) + (bc & 0xFFFFFFFF) + (ad & 0xFFFFFFFF);
|
||||||
|
const uint64_t upper64 = ac + (bc >> 32) + (ad >> 32) + (mid34 >> 32);
|
||||||
|
return upper64;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // BITCOIN_UTIL_FASTRANGE_H
|
@ -5,6 +5,8 @@
|
|||||||
#ifndef BITCOIN_UTIL_GOLOMBRICE_H
|
#ifndef BITCOIN_UTIL_GOLOMBRICE_H
|
||||||
#define BITCOIN_UTIL_GOLOMBRICE_H
|
#define BITCOIN_UTIL_GOLOMBRICE_H
|
||||||
|
|
||||||
|
#include <util/fastrange.h>
|
||||||
|
|
||||||
#include <streams.h>
|
#include <streams.h>
|
||||||
|
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
@ -40,35 +42,4 @@ uint64_t GolombRiceDecode(BitStreamReader<IStream>& bitreader, uint8_t P)
|
|||||||
return (q << P) + r;
|
return (q << P) + r;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Map a value x that is uniformly distributed in the range [0, 2^64) to a
|
|
||||||
// value uniformly distributed in [0, n) by returning the upper 64 bits of
|
|
||||||
// x * n.
|
|
||||||
//
|
|
||||||
// See: https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
|
|
||||||
static inline uint64_t MapIntoRange(uint64_t x, uint64_t n)
|
|
||||||
{
|
|
||||||
#ifdef __SIZEOF_INT128__
|
|
||||||
return (static_cast<unsigned __int128>(x) * static_cast<unsigned __int128>(n)) >> 64;
|
|
||||||
#else
|
|
||||||
// To perform the calculation on 64-bit numbers without losing the
|
|
||||||
// result to overflow, split the numbers into the most significant and
|
|
||||||
// least significant 32 bits and perform multiplication piece-wise.
|
|
||||||
//
|
|
||||||
// See: https://stackoverflow.com/a/26855440
|
|
||||||
const uint64_t x_hi = x >> 32;
|
|
||||||
const uint64_t x_lo = x & 0xFFFFFFFF;
|
|
||||||
const uint64_t n_hi = n >> 32;
|
|
||||||
const uint64_t n_lo = n & 0xFFFFFFFF;
|
|
||||||
|
|
||||||
const uint64_t ac = x_hi * n_hi;
|
|
||||||
const uint64_t ad = x_hi * n_lo;
|
|
||||||
const uint64_t bc = x_lo * n_hi;
|
|
||||||
const uint64_t bd = x_lo * n_lo;
|
|
||||||
|
|
||||||
const uint64_t mid34 = (bd >> 32) + (bc & 0xFFFFFFFF) + (ad & 0xFFFFFFFF);
|
|
||||||
const uint64_t upper64 = ac + (bc >> 32) + (ad >> 32) + (mid34 >> 32);
|
|
||||||
return upper64;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif // BITCOIN_UTIL_GOLOMBRICE_H
|
#endif // BITCOIN_UTIL_GOLOMBRICE_H
|
||||||
|
Loading…
Reference in New Issue
Block a user