mirror of
https://github.com/dashpay/dash.git
synced 2024-12-25 20:12:57 +01:00
Merge #15250: Use RdSeed when available, and reduce RdRand load
1435fabc19d2143187efb493cbe23225eaf851ae Use RdSeed when available, and reduce RdRand load (Pieter Wuille) Pull request description: This introduces support for autodetecting and using the RdSeed instruction on x86/x86_64 systems. In addition: * In SeedFast, only 64 bits of entropy are generated through RdRand (256 was relatively slow). * In SeedStartup, 256 bits of entropy are generated, using RdSeed (preferably) or RdRand (otherwise). Tree-SHA512: fb7d3e22e93e14592f4b07282aa79d7c3cc4e9debdd9978580b8d2562bbad345e289bf3f80de2c50c9b50b8bac2aa9b838f9f272f7f8d43f1efc0913aa8acce3
This commit is contained in:
parent
c3bd1dc640
commit
5167cbe911
180
src/random.cpp
180
src/random.cpp
@ -78,25 +78,122 @@ static inline int64_t GetPerformanceCounter() noexcept
|
|||||||
}
|
}
|
||||||
|
|
||||||
#if defined(__x86_64__) || defined(__amd64__) || defined(__i386__)
|
#if defined(__x86_64__) || defined(__amd64__) || defined(__i386__)
|
||||||
static bool rdrand_supported = false;
|
static bool g_rdrand_supported = false;
|
||||||
|
static bool g_rdseed_supported = false;
|
||||||
static constexpr uint32_t CPUID_F1_ECX_RDRAND = 0x40000000;
|
static constexpr uint32_t CPUID_F1_ECX_RDRAND = 0x40000000;
|
||||||
|
static constexpr uint32_t CPUID_F7_EBX_RDSEED = 0x00040000;
|
||||||
|
#ifdef bit_RDRND
|
||||||
|
static_assert(CPUID_F1_ECX_RDRAND == bit_RDRND, "Unexpected value for bit_RDRND");
|
||||||
|
#endif
|
||||||
|
#ifdef bit_RDSEED
|
||||||
|
static_assert(CPUID_F7_EBX_RDSEED == bit_RDSEED, "Unexpected value for bit_RDSEED");
|
||||||
|
#endif
|
||||||
|
static void inline GetCPUID(uint32_t leaf, uint32_t subleaf, uint32_t& a, uint32_t& b, uint32_t& c, uint32_t& d)
|
||||||
|
{
|
||||||
|
// We can't use __get_cpuid as it doesn't support subleafs.
|
||||||
|
#ifdef __GNUC__
|
||||||
|
__cpuid_count(leaf, subleaf, a, b, c, d);
|
||||||
|
#else
|
||||||
|
__asm__ ("cpuid" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "0"(leaf), "2"(subleaf));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
static void InitHardwareRand()
|
static void InitHardwareRand()
|
||||||
{
|
{
|
||||||
uint32_t eax, ebx, ecx, edx;
|
uint32_t eax, ebx, ecx, edx;
|
||||||
if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) && (ecx & CPUID_F1_ECX_RDRAND)) {
|
GetCPUID(1, 0, eax, ebx, ecx, edx);
|
||||||
rdrand_supported = true;
|
if (ecx & CPUID_F1_ECX_RDRAND) {
|
||||||
|
g_rdrand_supported = true;
|
||||||
|
}
|
||||||
|
GetCPUID(7, 0, eax, ebx, ecx, edx);
|
||||||
|
if (ebx & CPUID_F7_EBX_RDSEED) {
|
||||||
|
g_rdseed_supported = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ReportHardwareRand()
|
static void ReportHardwareRand()
|
||||||
{
|
{
|
||||||
if (rdrand_supported) {
|
// This must be done in a separate function, as HWRandInit() may be indirectly called
|
||||||
// This must be done in a separate function, as HWRandInit() may be indirectly called
|
// from global constructors, before logging is initialized.
|
||||||
// from global constructors, before logging is initialized.
|
if (g_rdseed_supported) {
|
||||||
|
LogPrintf("Using RdSeed as additional entropy source\n");
|
||||||
|
}
|
||||||
|
if (g_rdrand_supported) {
|
||||||
LogPrintf("Using RdRand as an additional entropy source\n");
|
LogPrintf("Using RdRand as an additional entropy source\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Read 64 bits of entropy using rdrand.
|
||||||
|
*
|
||||||
|
* Must only be called when RdRand is supported.
|
||||||
|
*/
|
||||||
|
static uint64_t GetRdRand() noexcept
|
||||||
|
{
|
||||||
|
// RdRand may very rarely fail. Invoke it up to 10 times in a loop to reduce this risk.
|
||||||
|
#ifdef __i386__
|
||||||
|
uint8_t ok;
|
||||||
|
// Initialize to 0 to silence a compiler warning that r1 or r2 may be used
|
||||||
|
// uninitialized. Even if rdrand fails (!ok) it will set the output to 0,
|
||||||
|
// but there is no way that the compiler could know that.
|
||||||
|
uint32_t r1 = 0, r2 = 0;
|
||||||
|
for (int i = 0; i < 10; ++i) {
|
||||||
|
__asm__ volatile (".byte 0x0f, 0xc7, 0xf0; setc %1" : "=a"(r1), "=q"(ok) :: "cc"); // rdrand %eax
|
||||||
|
if (ok) break;
|
||||||
|
}
|
||||||
|
for (int i = 0; i < 10; ++i) {
|
||||||
|
__asm__ volatile (".byte 0x0f, 0xc7, 0xf0; setc %1" : "=a"(r2), "=q"(ok) :: "cc"); // rdrand %eax
|
||||||
|
if (ok) break;
|
||||||
|
}
|
||||||
|
return (((uint64_t)r2) << 32) | r1;
|
||||||
|
#elif defined(__x86_64__) || defined(__amd64__)
|
||||||
|
uint8_t ok;
|
||||||
|
uint64_t r1 = 0; // See above why we initialize to 0.
|
||||||
|
for (int i = 0; i < 10; ++i) {
|
||||||
|
__asm__ volatile (".byte 0x48, 0x0f, 0xc7, 0xf0; setc %1" : "=a"(r1), "=q"(ok) :: "cc"); // rdrand %rax
|
||||||
|
if (ok) break;
|
||||||
|
}
|
||||||
|
return r1;
|
||||||
|
#else
|
||||||
|
#error "RdRand is only supported on x86 and x86_64"
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Read 64 bits of entropy using rdseed.
|
||||||
|
*
|
||||||
|
* Must only be called when RdSeed is supported.
|
||||||
|
*/
|
||||||
|
static uint64_t GetRdSeed() noexcept
|
||||||
|
{
|
||||||
|
// RdSeed may fail when the HW RNG is overloaded. Loop indefinitely until enough entropy is gathered,
|
||||||
|
// but pause after every failure.
|
||||||
|
#ifdef __i386__
|
||||||
|
uint8_t ok;
|
||||||
|
uint32_t r1, r2;
|
||||||
|
do {
|
||||||
|
__asm__ volatile (".byte 0x0f, 0xc7, 0xf8; setc %1" : "=a"(r1), "=q"(ok) :: "cc"); // rdseed %eax
|
||||||
|
if (ok) break;
|
||||||
|
__asm__ volatile ("pause");
|
||||||
|
} while(true);
|
||||||
|
do {
|
||||||
|
__asm__ volatile (".byte 0x0f, 0xc7, 0xf8; setc %1" : "=a"(r2), "=q"(ok) :: "cc"); // rdseed %eax
|
||||||
|
if (ok) break;
|
||||||
|
__asm__ volatile ("pause");
|
||||||
|
} while(true);
|
||||||
|
return (((uint64_t)r2) << 32) | r1;
|
||||||
|
#elif defined(__x86_64__) || defined(__amd64__)
|
||||||
|
uint8_t ok;
|
||||||
|
uint64_t r1;
|
||||||
|
do {
|
||||||
|
__asm__ volatile (".byte 0x48, 0x0f, 0xc7, 0xf8; setc %1" : "=a"(r1), "=q"(ok) :: "cc"); // rdseed %rax
|
||||||
|
if (ok) break;
|
||||||
|
__asm__ volatile ("pause");
|
||||||
|
} while(true);
|
||||||
|
return r1;
|
||||||
|
#else
|
||||||
|
#error "RdSeed is only supported on x86 and x86_64"
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
/* Access to other hardware random number generators could be added here later,
|
/* Access to other hardware random number generators could be added here later,
|
||||||
* assuming it is sufficiently fast (in the order of a few hundred CPU cycles).
|
* assuming it is sufficiently fast (in the order of a few hundred CPU cycles).
|
||||||
@ -107,43 +204,40 @@ static void InitHardwareRand() {}
|
|||||||
static void ReportHardwareRand() {}
|
static void ReportHardwareRand() {}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static bool GetHardwareRand(unsigned char* ent32) noexcept {
|
/** Add 64 bits of entropy gathered from hardware to hasher. Do nothing if not supported. */
|
||||||
|
static void SeedHardwareFast(CSHA512& hasher) noexcept {
|
||||||
#if defined(__x86_64__) || defined(__amd64__) || defined(__i386__)
|
#if defined(__x86_64__) || defined(__amd64__) || defined(__i386__)
|
||||||
if (rdrand_supported) {
|
if (g_rdrand_supported) {
|
||||||
uint8_t ok;
|
uint64_t out = GetRdRand();
|
||||||
// Not all assemblers support the rdrand instruction, write it in hex.
|
hasher.Write((const unsigned char*)&out, sizeof(out));
|
||||||
#ifdef __i386__
|
return;
|
||||||
for (int iter = 0; iter < 4; ++iter) {
|
}
|
||||||
// Initialize to 0 to silence a compiler warning that r1 or r2 may be used
|
#endif
|
||||||
// uninitialized. Even if rdrand fails (!ok) it will set the output to 0,
|
}
|
||||||
// but there is no way that the compiler could know that.
|
|
||||||
uint32_t r1 = 0, r2 = 0;
|
/** Add 256 bits of entropy gathered from hardware to hasher. Do nothing if not supported. */
|
||||||
__asm__ volatile (".byte 0x0f, 0xc7, 0xf0;" // rdrand %eax
|
static void SeedHardwareSlow(CSHA512& hasher) noexcept {
|
||||||
".byte 0x0f, 0xc7, 0xf2;" // rdrand %edx
|
#if defined(__x86_64__) || defined(__amd64__) || defined(__i386__)
|
||||||
"setc %2" :
|
// When we want 256 bits of entropy, prefer RdSeed over RdRand, as it's
|
||||||
"=a"(r1), "=d"(r2), "=q"(ok) :: "cc");
|
// guaranteed to produce independent randomness on every call.
|
||||||
if (!ok) return false;
|
if (g_rdseed_supported) {
|
||||||
WriteLE32(ent32 + 8 * iter, r1);
|
for (int i = 0; i < 4; ++i) {
|
||||||
WriteLE32(ent32 + 8 * iter + 4, r2);
|
uint64_t out = GetRdSeed();
|
||||||
}
|
hasher.Write((const unsigned char*)&out, sizeof(out));
|
||||||
#else
|
}
|
||||||
uint64_t r1 = 0, r2 = 0, r3 = 0, r4 = 0; // See above why we initialize to 0.
|
return;
|
||||||
__asm__ volatile (".byte 0x48, 0x0f, 0xc7, 0xf0, " // rdrand %rax
|
}
|
||||||
"0x48, 0x0f, 0xc7, 0xf3, " // rdrand %rbx
|
// When falling back to RdRand, XOR the result of 1024 results.
|
||||||
"0x48, 0x0f, 0xc7, 0xf1, " // rdrand %rcx
|
// This guarantees a reseeding occurs between each.
|
||||||
"0x48, 0x0f, 0xc7, 0xf2; " // rdrand %rdx
|
if (g_rdrand_supported) {
|
||||||
"setc %4" :
|
for (int i = 0; i < 4; ++i) {
|
||||||
"=a"(r1), "=b"(r2), "=c"(r3), "=d"(r4), "=q"(ok) :: "cc");
|
uint64_t out = 0;
|
||||||
if (!ok) return false;
|
for (int j = 0; j < 1024; ++j) out ^= GetRdRand();
|
||||||
WriteLE64(ent32, r1);
|
hasher.Write((const unsigned char*)&out, sizeof(out));
|
||||||
WriteLE64(ent32 + 8, r2);
|
}
|
||||||
WriteLE64(ent32 + 16, r3);
|
return;
|
||||||
WriteLE64(ent32 + 24, r4);
|
|
||||||
#endif
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Use repeated SHA512 to strengthen the randomness in seed32, and feed into hasher. */
|
/** Use repeated SHA512 to strengthen the randomness in seed32, and feed into hasher. */
|
||||||
@ -431,8 +525,7 @@ static void SeedFast(CSHA512& hasher) noexcept
|
|||||||
hasher.Write((const unsigned char*)&ptr, sizeof(ptr));
|
hasher.Write((const unsigned char*)&ptr, sizeof(ptr));
|
||||||
|
|
||||||
// Hardware randomness is very fast when available; use it always.
|
// Hardware randomness is very fast when available; use it always.
|
||||||
bool have_hw_rand = GetHardwareRand(buffer);
|
SeedHardwareFast(hasher);
|
||||||
if (have_hw_rand) hasher.Write(buffer, sizeof(buffer));
|
|
||||||
|
|
||||||
// High-precision timestamp
|
// High-precision timestamp
|
||||||
SeedTimestamp(hasher);
|
SeedTimestamp(hasher);
|
||||||
@ -503,6 +596,9 @@ static void SeedStartup(CSHA512& hasher, RNGState& rng) noexcept
|
|||||||
RAND_screen();
|
RAND_screen();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// Gather 256 bits of hardware randomness, if available
|
||||||
|
SeedHardwareSlow(hasher);
|
||||||
|
|
||||||
// Everything that the 'slow' seeder includes.
|
// Everything that the 'slow' seeder includes.
|
||||||
SeedSlow(hasher);
|
SeedSlow(hasher);
|
||||||
|
|
||||||
|
@ -25,7 +25,7 @@
|
|||||||
* perform 'fast' seeding, consisting of mixing in:
|
* perform 'fast' seeding, consisting of mixing in:
|
||||||
* - A stack pointer (indirectly committing to calling thread and call stack)
|
* - A stack pointer (indirectly committing to calling thread and call stack)
|
||||||
* - A high-precision timestamp (rdtsc when available, c++ high_resolution_clock otherwise)
|
* - A high-precision timestamp (rdtsc when available, c++ high_resolution_clock otherwise)
|
||||||
* - Hardware RNG (rdrand) when available.
|
* - 64 bits from the hardware RNG (rdrand) when available.
|
||||||
* These entropy sources are very fast, and only designed to protect against situations
|
* These entropy sources are very fast, and only designed to protect against situations
|
||||||
* where a VM state restore/copy results in multiple systems with the same randomness.
|
* where a VM state restore/copy results in multiple systems with the same randomness.
|
||||||
* FastRandomContext on the other hand does not protect against this once created, but
|
* FastRandomContext on the other hand does not protect against this once created, but
|
||||||
@ -50,6 +50,7 @@
|
|||||||
*
|
*
|
||||||
* On first use of the RNG (regardless of what function is called first), all entropy
|
* On first use of the RNG (regardless of what function is called first), all entropy
|
||||||
* sources used in the 'slow' seeder are included, but also:
|
* sources used in the 'slow' seeder are included, but also:
|
||||||
|
* - 256 bits from the hardware RNG (rdseed or rdrand) when available.
|
||||||
* - (On Windows) Performance monitoring data from the OS.
|
* - (On Windows) Performance monitoring data from the OS.
|
||||||
* - (On Windows) Through OpenSSL, the screen contents.
|
* - (On Windows) Through OpenSSL, the screen contents.
|
||||||
* - Strengthen the entropy for 100 ms using repeated SHA512.
|
* - Strengthen the entropy for 100 ms using repeated SHA512.
|
||||||
|
Loading…
Reference in New Issue
Block a user