diff --git a/configure.ac b/configure.ac index 0e0e61a926..e5b43a7673 100644 --- a/configure.ac +++ b/configure.ac @@ -798,6 +798,9 @@ if test x$use_lcov_branch != xno; then AC_SUBST(LCOV_OPTS, "$LCOV_OPTS --rc lcov_branch_coverage=1") fi +dnl Check for __int128 +AC_CHECK_TYPES([__int128]) + dnl Check for endianness AC_C_BIGENDIAN diff --git a/src/Makefile.am b/src/Makefile.am index 944468d728..40d14a6042 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -527,6 +527,8 @@ crypto_libdash_crypto_base_a_SOURCES = \ crypto/hmac_sha256.h \ crypto/hmac_sha512.cpp \ crypto/hmac_sha512.h \ + crypto/muhash.h \ + crypto/muhash.cpp \ crypto/poly1305.h \ crypto/poly1305.cpp \ crypto/pkcs5_pbkdf2_hmac_sha512.cpp \ diff --git a/src/bench/crypto_hash.cpp b/src/bench/crypto_hash.cpp index ece3ed3a90..13ce77c8ba 100644 --- a/src/bench/crypto_hash.cpp +++ b/src/bench/crypto_hash.cpp @@ -5,6 +5,7 @@ #include +#include #include #include #include @@ -244,6 +245,54 @@ static void FastRandom_1bit(benchmark::Bench& bench) }); } +static void MuHash(benchmark::Bench& bench) +{ + MuHash3072 acc; + unsigned char key[32] = {0}; + int i = 0; + bench.run([&] { + key[0] = ++i; + acc *= MuHash3072(key); + }); +} + +static void MuHashMul(benchmark::Bench& bench) +{ + MuHash3072 acc; + FastRandomContext rng(true); + MuHash3072 muhash{rng.randbytes(32)}; + + bench.run([&] { + acc *= muhash; + }); +} + +static void MuHashDiv(benchmark::Bench& bench) +{ + MuHash3072 acc; + FastRandomContext rng(true); + MuHash3072 muhash{rng.randbytes(32)}; + + for (size_t i = 0; i < bench.epochIterations(); ++i) { + acc *= muhash; + } + + bench.run([&] { + acc /= muhash; + }); +} + +static void MuHashPrecompute(benchmark::Bench& bench) +{ + MuHash3072 acc; + FastRandomContext rng(true); + std::vector key{rng.randbytes(32)}; + + bench.run([&] { + MuHash3072{key}; + }); +} + BENCHMARK(HASH_1MB_DSHA256); BENCHMARK(HASH_1MB_RIPEMD160); BENCHMARK(HASH_1MB_SHA1); @@ -272,3 +321,8 @@ BENCHMARK(HASH_SHA256D64_1024); BENCHMARK(FastRandom_32bit); BENCHMARK(FastRandom_1bit); + +BENCHMARK(MuHash); +BENCHMARK(MuHashMul); +BENCHMARK(MuHashDiv); +BENCHMARK(MuHashPrecompute); diff --git a/src/crypto/muhash.cpp b/src/crypto/muhash.cpp new file mode 100644 index 0000000000..e5a0d4cb9c --- /dev/null +++ b/src/crypto/muhash.cpp @@ -0,0 +1,346 @@ +// Copyright (c) 2017-2020 The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#include + +#include +#include +#include + +#include +#include +#include + +namespace { + +using limb_t = Num3072::limb_t; +using double_limb_t = Num3072::double_limb_t; +constexpr int LIMB_SIZE = Num3072::LIMB_SIZE; +/** 2^3072 - 1103717, the largest 3072-bit safe prime number, is used as the modulus. */ +constexpr limb_t MAX_PRIME_DIFF = 1103717; + +/** Extract the lowest limb of [c0,c1,c2] into n, and left shift the number by 1 limb. */ +inline void extract3(limb_t& c0, limb_t& c1, limb_t& c2, limb_t& n) +{ + n = c0; + c0 = c1; + c1 = c2; + c2 = 0; +} + +/** [c0,c1] = a * b */ +inline void mul(limb_t& c0, limb_t& c1, const limb_t& a, const limb_t& b) +{ + double_limb_t t = (double_limb_t)a * b; + c1 = t >> LIMB_SIZE; + c0 = t; +} + +/* [c0,c1,c2] += n * [d0,d1,d2]. c2 is 0 initially */ +inline void mulnadd3(limb_t& c0, limb_t& c1, limb_t& c2, limb_t& d0, limb_t& d1, limb_t& d2, const limb_t& n) +{ + double_limb_t t = (double_limb_t)d0 * n + c0; + c0 = t; + t >>= LIMB_SIZE; + t += (double_limb_t)d1 * n + c1; + c1 = t; + t >>= LIMB_SIZE; + c2 = t + d2 * n; +} + +/* [c0,c1] *= n */ +inline void muln2(limb_t& c0, limb_t& c1, const limb_t& n) +{ + double_limb_t t = (double_limb_t)c0 * n; + c0 = t; + t >>= LIMB_SIZE; + t += (double_limb_t)c1 * n; + c1 = t; +} + +/** [c0,c1,c2] += a * b */ +inline void muladd3(limb_t& c0, limb_t& c1, limb_t& c2, const limb_t& a, const limb_t& b) +{ + double_limb_t t = (double_limb_t)a * b; + limb_t th = t >> LIMB_SIZE; + limb_t tl = t; + + c0 += tl; + th += (c0 < tl) ? 1 : 0; + c1 += th; + c2 += (c1 < th) ? 1 : 0; +} + +/** [c0,c1,c2] += 2 * a * b */ +inline void muldbladd3(limb_t& c0, limb_t& c1, limb_t& c2, const limb_t& a, const limb_t& b) +{ + double_limb_t t = (double_limb_t)a * b; + limb_t th = t >> LIMB_SIZE; + limb_t tl = t; + + c0 += tl; + limb_t tt = th + ((c0 < tl) ? 1 : 0); + c1 += tt; + c2 += (c1 < tt) ? 1 : 0; + c0 += tl; + th += (c0 < tl) ? 1 : 0; + c1 += th; + c2 += (c1 < th) ? 1 : 0; +} + +/** + * Add limb a to [c0,c1]: [c0,c1] += a. Then extract the lowest + * limb of [c0,c1] into n, and left shift the number by 1 limb. + * */ +inline void addnextract2(limb_t& c0, limb_t& c1, const limb_t& a, limb_t& n) +{ + limb_t c2 = 0; + + // add + c0 += a; + if (c0 < a) { + c1 += 1; + + // Handle case when c1 has overflown + if (c1 == 0) + c2 = 1; + } + + // extract + n = c0; + c0 = c1; + c1 = c2; +} + +/** in_out = in_out^(2^sq) * mul */ +inline void square_n_mul(Num3072& in_out, const int sq, const Num3072& mul) +{ + for (int j = 0; j < sq; ++j) in_out.Square(); + in_out.Multiply(mul); +} + +} // namespace + +/** Indicates whether d is larger than the modulus. */ +bool Num3072::IsOverflow() const +{ + if (this->limbs[0] <= std::numeric_limits::max() - MAX_PRIME_DIFF) return false; + for (int i = 1; i < LIMBS; ++i) { + if (this->limbs[i] != std::numeric_limits::max()) return false; + } + return true; +} + +void Num3072::FullReduce() +{ + limb_t c0 = MAX_PRIME_DIFF; + limb_t c1 = 0; + for (int i = 0; i < LIMBS; ++i) { + addnextract2(c0, c1, this->limbs[i], this->limbs[i]); + } +} + +Num3072 Num3072::GetInverse() const +{ + // For fast exponentiation a sliding window exponentiation with repunit + // precomputation is utilized. See "Fast Point Decompression for Standard + // Elliptic Curves" (Brumley, Järvinen, 2008). + + Num3072 p[12]; // p[i] = a^(2^(2^i)-1) + Num3072 out; + + p[0] = *this; + + for (int i = 0; i < 11; ++i) { + p[i + 1] = p[i]; + for (int j = 0; j < (1 << i); ++j) p[i + 1].Square(); + p[i + 1].Multiply(p[i]); + } + + out = p[11]; + + square_n_mul(out, 512, p[9]); + square_n_mul(out, 256, p[8]); + square_n_mul(out, 128, p[7]); + square_n_mul(out, 64, p[6]); + square_n_mul(out, 32, p[5]); + square_n_mul(out, 8, p[3]); + square_n_mul(out, 2, p[1]); + square_n_mul(out, 1, p[0]); + square_n_mul(out, 5, p[2]); + square_n_mul(out, 3, p[0]); + square_n_mul(out, 2, p[0]); + square_n_mul(out, 4, p[0]); + square_n_mul(out, 4, p[1]); + square_n_mul(out, 3, p[0]); + + return out; +} + +void Num3072::Multiply(const Num3072& a) +{ + limb_t c0 = 0, c1 = 0, c2 = 0; + Num3072 tmp; + + /* Compute limbs 0..N-2 of this*a into tmp, including one reduction. */ + for (int j = 0; j < LIMBS - 1; ++j) { + limb_t d0 = 0, d1 = 0, d2 = 0; + mul(d0, d1, this->limbs[1 + j], a.limbs[LIMBS + j - (1 + j)]); + for (int i = 2 + j; i < LIMBS; ++i) muladd3(d0, d1, d2, this->limbs[i], a.limbs[LIMBS + j - i]); + mulnadd3(c0, c1, c2, d0, d1, d2, MAX_PRIME_DIFF); + for (int i = 0; i < j + 1; ++i) muladd3(c0, c1, c2, this->limbs[i], a.limbs[j - i]); + extract3(c0, c1, c2, tmp.limbs[j]); + } + + /* Compute limb N-1 of a*b into tmp. */ + assert(c2 == 0); + for (int i = 0; i < LIMBS; ++i) muladd3(c0, c1, c2, this->limbs[i], a.limbs[LIMBS - 1 - i]); + extract3(c0, c1, c2, tmp.limbs[LIMBS - 1]); + + /* Perform a second reduction. */ + muln2(c0, c1, MAX_PRIME_DIFF); + for (int j = 0; j < LIMBS; ++j) { + addnextract2(c0, c1, tmp.limbs[j], this->limbs[j]); + } + + assert(c1 == 0); + assert(c0 == 0 || c0 == 1); + + /* Perform up to two more reductions if the internal state has already + * overflown the MAX of Num3072 or if it is larger than the modulus or + * if both are the case. + * */ + if (this->IsOverflow()) this->FullReduce(); + if (c0) this->FullReduce(); +} + +void Num3072::Square() +{ + limb_t c0 = 0, c1 = 0, c2 = 0; + Num3072 tmp; + + /* Compute limbs 0..N-2 of this*this into tmp, including one reduction. */ + for (int j = 0; j < LIMBS - 1; ++j) { + limb_t d0 = 0, d1 = 0, d2 = 0; + for (int i = 0; i < (LIMBS - 1 - j) / 2; ++i) muldbladd3(d0, d1, d2, this->limbs[i + j + 1], this->limbs[LIMBS - 1 - i]); + if ((j + 1) & 1) muladd3(d0, d1, d2, this->limbs[(LIMBS - 1 - j) / 2 + j + 1], this->limbs[LIMBS - 1 - (LIMBS - 1 - j) / 2]); + mulnadd3(c0, c1, c2, d0, d1, d2, MAX_PRIME_DIFF); + for (int i = 0; i < (j + 1) / 2; ++i) muldbladd3(c0, c1, c2, this->limbs[i], this->limbs[j - i]); + if ((j + 1) & 1) muladd3(c0, c1, c2, this->limbs[(j + 1) / 2], this->limbs[j - (j + 1) / 2]); + extract3(c0, c1, c2, tmp.limbs[j]); + } + + assert(c2 == 0); + for (int i = 0; i < LIMBS / 2; ++i) muldbladd3(c0, c1, c2, this->limbs[i], this->limbs[LIMBS - 1 - i]); + extract3(c0, c1, c2, tmp.limbs[LIMBS - 1]); + + /* Perform a second reduction. */ + muln2(c0, c1, MAX_PRIME_DIFF); + for (int j = 0; j < LIMBS; ++j) { + addnextract2(c0, c1, tmp.limbs[j], this->limbs[j]); + } + + assert(c1 == 0); + assert(c0 == 0 || c0 == 1); + + /* Perform up to two more reductions if the internal state has already + * overflown the MAX of Num3072 or if it is larger than the modulus or + * if both are the case. + * */ + if (this->IsOverflow()) this->FullReduce(); + if (c0) this->FullReduce(); +} + +void Num3072::SetToOne() +{ + this->limbs[0] = 1; + for (int i = 1; i < LIMBS; ++i) this->limbs[i] = 0; +} + +void Num3072::Divide(const Num3072& a) +{ + if (this->IsOverflow()) this->FullReduce(); + + Num3072 inv{}; + if (a.IsOverflow()) { + Num3072 b = a; + b.FullReduce(); + inv = b.GetInverse(); + } else { + inv = a.GetInverse(); + } + + this->Multiply(inv); + if (this->IsOverflow()) this->FullReduce(); +} + +Num3072::Num3072(const unsigned char (&data)[BYTE_SIZE]) { + for (int i = 0; i < LIMBS; ++i) { + if (sizeof(limb_t) == 4) { + this->limbs[i] = ReadLE32(data + 4 * i); + } else if (sizeof(limb_t) == 8) { + this->limbs[i] = ReadLE64(data + 8 * i); + } + } +} + +void Num3072::ToBytes(unsigned char (&out)[BYTE_SIZE]) { + for (int i = 0; i < LIMBS; ++i) { + if (sizeof(limb_t) == 4) { + WriteLE32(out + i * 4, this->limbs[i]); + } else if (sizeof(limb_t) == 8) { + WriteLE64(out + i * 8, this->limbs[i]); + } + } +} + +Num3072 MuHash3072::ToNum3072(Span in) { + unsigned char tmp[Num3072::BYTE_SIZE]; + + uint256 hashed_in = (CHashWriter(SER_DISK, 0) << in).GetSHA256(); + ChaCha20(hashed_in.data(), hashed_in.size()).Keystream(tmp, Num3072::BYTE_SIZE); + Num3072 out{tmp}; + + return out; +} + +MuHash3072::MuHash3072(Span in) noexcept +{ + m_numerator = ToNum3072(in); +} + +void MuHash3072::Finalize(uint256& out) noexcept +{ + m_numerator.Divide(m_denominator); + m_denominator.SetToOne(); // Needed to keep the MuHash object valid + + unsigned char data[Num3072::BYTE_SIZE]; + m_numerator.ToBytes(data); + + out = (CHashWriter(SER_DISK, 0) << data).GetSHA256(); +} + +MuHash3072& MuHash3072::operator*=(const MuHash3072& mul) noexcept +{ + m_numerator.Multiply(mul.m_numerator); + m_denominator.Multiply(mul.m_denominator); + return *this; +} + +MuHash3072& MuHash3072::operator/=(const MuHash3072& div) noexcept +{ + m_numerator.Multiply(div.m_denominator); + m_denominator.Multiply(div.m_numerator); + return *this; +} + +MuHash3072& MuHash3072::Insert(Span in) noexcept { + m_numerator.Multiply(ToNum3072(in)); + return *this; +} + +MuHash3072& MuHash3072::Remove(Span in) noexcept { + m_numerator.Divide(ToNum3072(in)); + return *this; +} diff --git a/src/crypto/muhash.h b/src/crypto/muhash.h new file mode 100644 index 0000000000..64a9220862 --- /dev/null +++ b/src/crypto/muhash.h @@ -0,0 +1,131 @@ +// Copyright (c) 2017-2020 The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#ifndef BITCOIN_CRYPTO_MUHASH_H +#define BITCOIN_CRYPTO_MUHASH_H + +#if defined(HAVE_CONFIG_H) +#include +#endif + +#include +#include + +#include + +class Num3072 +{ +private: + void FullReduce(); + bool IsOverflow() const; + Num3072 GetInverse() const; + +public: + static constexpr size_t BYTE_SIZE = 384; + +#ifdef HAVE___INT128 + typedef unsigned __int128 double_limb_t; + typedef uint64_t limb_t; + static constexpr int LIMBS = 48; + static constexpr int LIMB_SIZE = 64; +#else + typedef uint64_t double_limb_t; + typedef uint32_t limb_t; + static constexpr int LIMBS = 96; + static constexpr int LIMB_SIZE = 32; +#endif + limb_t limbs[LIMBS]; + + // Sanity check for Num3072 constants + static_assert(LIMB_SIZE * LIMBS == 3072, "Num3072 isn't 3072 bits"); + static_assert(sizeof(double_limb_t) == sizeof(limb_t) * 2, "bad size for double_limb_t"); + static_assert(sizeof(limb_t) * 8 == LIMB_SIZE, "LIMB_SIZE is incorrect"); + + // Hard coded values in MuHash3072 constructor and Finalize + static_assert(sizeof(limb_t) == 4 || sizeof(limb_t) == 8, "bad size for limb_t"); + + void Multiply(const Num3072& a); + void Divide(const Num3072& a); + void SetToOne(); + void Square(); + void ToBytes(unsigned char (&out)[BYTE_SIZE]); + + Num3072() { this->SetToOne(); }; + Num3072(const unsigned char (&data)[BYTE_SIZE]); + + SERIALIZE_METHODS(Num3072, obj) + { + for (auto& limb : obj.limbs) { + READWRITE(limb); + } + } +}; + +/** A class representing MuHash sets + * + * MuHash is a hashing algorithm that supports adding set elements in any + * order but also deleting in any order. As a result, it can maintain a + * running sum for a set of data as a whole, and add/remove when data + * is added to or removed from it. A downside of MuHash is that computing + * an inverse is relatively expensive. This is solved by representing + * the running value as a fraction, and multiplying added elements into + * the numerator and removed elements into the denominator. Only when the + * final hash is desired, a single modular inverse and multiplication is + * needed to combine the two. The combination is also run on serialization + * to allow for space-efficient storage on disk. + * + * As the update operations are also associative, H(a)+H(b)+H(c)+H(d) can + * in fact be computed as (H(a)+H(b)) + (H(c)+H(d)). This implies that + * all of this is perfectly parallellizable: each thread can process an + * arbitrary subset of the update operations, allowing them to be + * efficiently combined later. + * + * MuHash does not support checking if an element is already part of the + * set. That is why this class does not enforce the use of a set as the + * data it represents because there is no efficient way to do so. + * It is possible to add elements more than once and also to remove + * elements that have not been added before. However, this implementation + * is intended to represent a set of elements. + * + * See also https://cseweb.ucsd.edu/~mihir/papers/inchash.pdf and + * https://lists.linuxfoundation.org/pipermail/bitcoin-dev/2017-May/014337.html. + */ +class MuHash3072 +{ +private: + Num3072 m_numerator; + Num3072 m_denominator; + + Num3072 ToNum3072(Span in); + +public: + /* The empty set. */ + MuHash3072() noexcept {}; + + /* A singleton with variable sized data in it. */ + explicit MuHash3072(Span in) noexcept; + + /* Insert a single piece of data into the set. */ + MuHash3072& Insert(Span in) noexcept; + + /* Remove a single piece of data from the set. */ + MuHash3072& Remove(Span in) noexcept; + + /* Multiply (resulting in a hash for the union of the sets) */ + MuHash3072& operator*=(const MuHash3072& mul) noexcept; + + /* Divide (resulting in a hash for the difference of the sets) */ + MuHash3072& operator/=(const MuHash3072& div) noexcept; + + /* Finalize into a 32-byte hash. Does not change this object's value. */ + void Finalize(uint256& out) noexcept; + + SERIALIZE_METHODS(MuHash3072, obj) + { + READWRITE(obj.m_numerator); + READWRITE(obj.m_denominator); + } +}; + +#endif // BITCOIN_CRYPTO_MUHASH_H diff --git a/src/hash.cpp b/src/hash.cpp index b9d1ef76ad..89491ad9f3 100644 --- a/src/hash.cpp +++ b/src/hash.cpp @@ -74,3 +74,10 @@ void BIP32Hash(const ChainCode &chainCode, unsigned int nChild, unsigned char he WriteBE32(num, nChild); CHMAC_SHA512(chainCode.begin(), chainCode.size()).Write(&header, 1).Write(data, 32).Write(num, 4).Finalize(output); } + +uint256 SHA256Uint256(const uint256& input) +{ + uint256 result; + CSHA256().Write(input.begin(), 32).Finalize(result.begin()); + return result; +} diff --git a/src/hash.h b/src/hash.h index dd6499fdb5..fcdb33f14f 100644 --- a/src/hash.h +++ b/src/hash.h @@ -7,6 +7,7 @@ #ifndef BITCOIN_HASH_H #define BITCOIN_HASH_H +#include #include #include #include @@ -187,7 +188,7 @@ inline uint160 Hash160(const prevector& vch) class CHashWriter { private: - CHash256 ctx; + CSHA256 ctx; const int nType; const int nVersion; @@ -199,13 +200,27 @@ public: int GetVersion() const { return nVersion; } void write(const char *pch, size_t size) { - ctx.Write({(const unsigned char*)pch, size}); + ctx.Write((const unsigned char*)pch, size); } - // invalidates the object + /** Compute the double-SHA256 hash of all data written to this object. + * + * Invalidates this object. + */ uint256 GetHash() { uint256 result; - ctx.Finalize(result); + ctx.Finalize(result.begin()); + ctx.Reset().Write(result.begin(), CSHA256::OUTPUT_SIZE).Finalize(result.begin()); + return result; + } + + /** Compute the SHA256 hash of all data written to this object. + * + * Invalidates this object. + */ + uint256 GetSHA256() { + uint256 result; + ctx.Finalize(result.begin()); return result; } @@ -213,9 +228,8 @@ public: * Returns the first 64 bits from the resulting hash. */ inline uint64_t GetCheapHash() { - unsigned char result[CHash256::OUTPUT_SIZE]; - ctx.Finalize(result); - return ReadLE64(result); + uint256 result = GetHash(); + return ReadLE64(result.begin()); } template @@ -270,6 +284,9 @@ uint256 SerializeHash(const T& obj, int nType=SER_GETHASH, int nVersion=PROTOCOL return ss.GetHash(); } +/** Single-SHA256 a 32-byte input (represented as uint256). */ +[[nodiscard]] uint256 SHA256Uint256(const uint256& input); + unsigned int MurmurHash3(unsigned int nHashSeed, Span vDataToHash); void BIP32Hash(const ChainCode &chainCode, unsigned int nChild, unsigned char header, const unsigned char data[32], unsigned char output[64]); diff --git a/src/init.cpp b/src/init.cpp index bf0ba04726..a1a8ce2693 100644 --- a/src/init.cpp +++ b/src/init.cpp @@ -986,7 +986,7 @@ void PeriodicStats() assert(gArgs.GetBoolArg("-statsenabled", DEFAULT_STATSD_ENABLE)); CCoinsStats stats; ::ChainstateActive().ForceFlushStateToDisk(); - if (WITH_LOCK(cs_main, return GetUTXOStats(&::ChainstateActive().CoinsDB(), stats))) { + if (WITH_LOCK(cs_main, return GetUTXOStats(&::ChainstateActive().CoinsDB(), stats, CoinStatsHashType::NONE))) { statsClient.gauge("utxoset.tx", stats.nTransactions, 1.0f); statsClient.gauge("utxoset.txOutputs", stats.nTransactionOutputs, 1.0f); statsClient.gauge("utxoset.dbSizeBytes", stats.nDiskSize, 1.0f); diff --git a/src/node/coinstats.cpp b/src/node/coinstats.cpp index a2a9b920d6..33149e3f4d 100644 --- a/src/node/coinstats.cpp +++ b/src/node/coinstats.cpp @@ -6,49 +6,98 @@ #include #include +#include #include #include -#include #include // #include #include +#include #include #include +static uint64_t GetBogoSize(const CScript& scriptPubKey) +{ + return 32 /* txid */ + + 4 /* vout index */ + + 4 /* height + coinbase */ + + 8 /* amount */ + + 2 /* scriptPubKey len */ + + scriptPubKey.size() /* scriptPubKey */; +} -static void ApplyStats(CCoinsStats &stats, CHashWriter& ss, const uint256& hash, const std::map& outputs) +static void ApplyHash(CCoinsStats& stats, CHashWriter& ss, const uint256& hash, const std::map& outputs, std::map::const_iterator it) +{ + if (it == outputs.begin()) { + ss << hash; + ss << VARINT(it->second.nHeight * 2 + it->second.fCoinBase ? 1u : 0u); + } + + ss << VARINT(it->first + 1); + ss << it->second.out.scriptPubKey; + ss << VARINT(it->second.out.nValue, VarIntMode::NONNEGATIVE_SIGNED); + + if (it == std::prev(outputs.end())) { + ss << VARINT(0u); + } +} + +static void ApplyHash(CCoinsStats& stats, std::nullptr_t, const uint256& hash, const std::map& outputs, std::map::const_iterator it) {} + +static void ApplyHash(CCoinsStats& stats, MuHash3072& muhash, const uint256& hash, const std::map& outputs, std::map::const_iterator it) +{ + COutPoint outpoint = COutPoint(hash, it->first); + Coin coin = it->second; + + CDataStream ss(SER_DISK, PROTOCOL_VERSION); + ss << outpoint; + ss << static_cast(coin.nHeight * 2 + coin.fCoinBase); + ss << coin.out; + muhash.Insert(MakeUCharSpan(ss)); +} + +template +static void ApplyStats(CCoinsStats& stats, T& hash_obj, const uint256& hash, const std::map& outputs) +{ + assert(!outputs.empty()); + stats.nTransactions++; + for (auto it = outputs.begin(); it != outputs.end(); ++it) { + ApplyHash(stats, hash_obj, hash, outputs, it); + + stats.nTransactionOutputs++; + stats.nTotalAmount += it->second.out.nValue; + stats.nBogoSize += GetBogoSize(it->second.out.scriptPubKey); + } +} + +static void ApplyStats(CCoinsStats& stats, std::nullptr_t, const uint256& hash, const std::map& outputs) { assert(!outputs.empty()); - ss << hash; - ss << VARINT(outputs.begin()->second.nHeight * 2 + outputs.begin()->second.fCoinBase ? 1u : 0u); stats.nTransactions++; for (const auto& output : outputs) { - ss << VARINT(output.first + 1); - ss << output.second.out.scriptPubKey; - ss << VARINT(output.second.out.nValue, VarIntMode::NONNEGATIVE_SIGNED); stats.nTransactionOutputs++; stats.nTotalAmount += output.second.out.nValue; - stats.nBogoSize += 32 /* txid */ + 4 /* vout index */ + 4 /* height + coinbase */ + 8 /* amount */ + - 2 /* scriptPubKey len */ + output.second.out.scriptPubKey.size() /* scriptPubKey */; + stats.nBogoSize += GetBogoSize(output.second.out.scriptPubKey); } - ss << VARINT(0u); } //! Calculate statistics about the unspent transaction output set -bool GetUTXOStats(CCoinsView *view, CCoinsStats &stats) +template +static bool GetUTXOStats(CCoinsView* view, CCoinsStats& stats, T hash_obj) { std::unique_ptr pcursor(view->Cursor()); assert(pcursor); - CHashWriter ss(SER_GETHASH, PROTOCOL_VERSION); stats.hashBlock = pcursor->GetBestBlock(); { LOCK(cs_main); stats.nHeight = LookupBlockIndex(stats.hashBlock)->nHeight; } - ss << stats.hashBlock; + + PrepareHash(hash_obj, stats); + uint256 prevkey; std::map outputs; while (pcursor->Valid()) { @@ -57,7 +106,7 @@ bool GetUTXOStats(CCoinsView *view, CCoinsStats &stats) Coin coin; if (pcursor->GetKey(key) && pcursor->GetValue(coin)) { if (!outputs.empty() && key.hash != prevkey) { - ApplyStats(stats, ss, prevkey, outputs); + ApplyStats(stats, hash_obj, prevkey, outputs); outputs.clear(); } prevkey = key.hash; @@ -68,9 +117,50 @@ bool GetUTXOStats(CCoinsView *view, CCoinsStats &stats) pcursor->Next(); } if (!outputs.empty()) { - ApplyStats(stats, ss, prevkey, outputs); + ApplyStats(stats, hash_obj, prevkey, outputs); } - stats.hashSerialized = ss.GetHash(); + + FinalizeHash(hash_obj, stats); + stats.nDiskSize = view->EstimateSize(); return true; } + +bool GetUTXOStats(CCoinsView* view, CCoinsStats& stats, CoinStatsHashType hash_type) +{ + switch (hash_type) { + case(CoinStatsHashType::HASH_SERIALIZED): { + CHashWriter ss(SER_GETHASH, PROTOCOL_VERSION); + return GetUTXOStats(view, stats, ss); + } + case(CoinStatsHashType::MUHASH): { + MuHash3072 muhash; + return GetUTXOStats(view, stats, muhash); + } + case(CoinStatsHashType::NONE): { + return GetUTXOStats(view, stats, nullptr); + } + } // no default case, so the compiler can warn about missing cases + assert(false); +} + +// The legacy hash serializes the hashBlock +static void PrepareHash(CHashWriter& ss, CCoinsStats& stats) +{ + ss << stats.hashBlock; +} +// MuHash does not need the prepare step +static void PrepareHash(MuHash3072& muhash, CCoinsStats& stats) {} +static void PrepareHash(std::nullptr_t, CCoinsStats& stats) {} + +static void FinalizeHash(CHashWriter& ss, CCoinsStats& stats) +{ + stats.hashSerialized = ss.GetHash(); +} +static void FinalizeHash(MuHash3072& muhash, CCoinsStats& stats) +{ + uint256 out; + muhash.Finalize(out); + stats.hashSerialized = out; +} +static void FinalizeHash(std::nullptr_t, CCoinsStats& stats) {} diff --git a/src/node/coinstats.h b/src/node/coinstats.h index 7c11aab8bd..7e24193cca 100644 --- a/src/node/coinstats.h +++ b/src/node/coinstats.h @@ -13,6 +13,12 @@ class CCoinsView; +enum class CoinStatsHashType { + HASH_SERIALIZED, + MUHASH, + NONE, +}; + struct CCoinsStats { int nHeight; @@ -28,6 +34,6 @@ struct CCoinsStats }; //! Calculate statistics about the unspent transaction output set -bool GetUTXOStats(CCoinsView* view, CCoinsStats& stats); +bool GetUTXOStats(CCoinsView* view, CCoinsStats& stats, const CoinStatsHashType hash_type); #endif // BITCOIN_NODE_COINSTATS_H diff --git a/src/rpc/blockchain.cpp b/src/rpc/blockchain.cpp index b1f6ccf43c..208fcefb3c 100644 --- a/src/rpc/blockchain.cpp +++ b/src/rpc/blockchain.cpp @@ -1236,14 +1236,29 @@ static UniValue pruneblockchain(const JSONRPCRequest& request) return uint64_t(block->nHeight); } +CoinStatsHashType ParseHashType(const std::string& hash_type_input) +{ + if (hash_type_input == "hash_serialized_2") { + return CoinStatsHashType::HASH_SERIALIZED; + } else if (hash_type_input == "muhash") { + return CoinStatsHashType::MUHASH; + } else if (hash_type_input == "none") { + return CoinStatsHashType::NONE; + } else { + throw JSONRPCError(RPC_INVALID_PARAMETER, strprintf("%s is not a valid hash_type", hash_type_input)); + } +} + static UniValue gettxoutsetinfo(const JSONRPCRequest& request) { - if (request.fHelp || request.params.size() != 0) + if (request.fHelp || request.params.size() > 1) throw std::runtime_error( RPCHelpMan{"gettxoutsetinfo", "\nReturns statistics about the unspent transaction output set.\n" "Note this call may take some time.\n", - {}, + { + {"hash_type", RPCArg::Type::STR, /* default */ "hash_serialized_2", "Which UTXO set hash should be calculated. Options: 'hash_serialized_2' (the legacy algorithm), 'muhash', 'none'."}, + }, RPCResult{ RPCResult::Type::OBJ, "", "", { @@ -1252,7 +1267,8 @@ static UniValue gettxoutsetinfo(const JSONRPCRequest& request) {RPCResult::Type::NUM, "transactions", "The number of transactions with unspent outputs"}, {RPCResult::Type::NUM, "txouts", "The number of unspent transaction outputs"}, {RPCResult::Type::NUM, "bogosize", "A meaningless metric for UTXO set size"}, - {RPCResult::Type::STR_HEX, "hash_serialized_2", "The serialized hash"}, + {RPCResult::Type::STR_HEX, "hash_serialized_2", "The serialized hash (only present if 'hash_serialized_2' hash_type is chosen)"}, + {RPCResult::Type::STR_HEX, "muhash", "The serialized hash (only present if 'muhash' hash_type is chosen)"}, {RPCResult::Type::NUM, "disk_size", "The estimated size of the chainstate on disk"}, {RPCResult::Type::STR_AMOUNT, "total_amount", "The total amount"}, }}, @@ -1267,14 +1283,21 @@ static UniValue gettxoutsetinfo(const JSONRPCRequest& request) CCoinsStats stats; ::ChainstateActive().ForceFlushStateToDisk(); + const CoinStatsHashType hash_type{request.params[0].isNull() ? CoinStatsHashType::HASH_SERIALIZED : ParseHashType(request.params[0].get_str())}; + CCoinsView* coins_view = WITH_LOCK(cs_main, return &ChainstateActive().CoinsDB()); - if (GetUTXOStats(coins_view, stats)) { + if (GetUTXOStats(coins_view, stats, hash_type)) { ret.pushKV("height", (int64_t)stats.nHeight); ret.pushKV("bestblock", stats.hashBlock.GetHex()); ret.pushKV("transactions", (int64_t)stats.nTransactions); ret.pushKV("txouts", (int64_t)stats.nTransactionOutputs); ret.pushKV("bogosize", (int64_t)stats.nBogoSize); - ret.pushKV("hash_serialized_2", stats.hashSerialized.GetHex()); + if (hash_type == CoinStatsHashType::HASH_SERIALIZED) { + ret.pushKV("hash_serialized_2", stats.hashSerialized.GetHex()); + } + if (hash_type == CoinStatsHashType::MUHASH) { + ret.pushKV("muhash", stats.hashSerialized.GetHex()); + } ret.pushKV("disk_size", stats.nDiskSize); ret.pushKV("total_amount", ValueFromAmount(stats.nTotalAmount)); } else { @@ -2754,7 +2777,7 @@ static const CRPCCommand commands[] = { "blockchain", "getrawmempool", &getrawmempool, {"verbose"} }, { "blockchain", "getspecialtxes", &getspecialtxes, {"blockhash", "type", "count", "skip", "verbosity"} }, { "blockchain", "gettxout", &gettxout, {"txid","n","include_mempool"} }, - { "blockchain", "gettxoutsetinfo", &gettxoutsetinfo, {} }, + { "blockchain", "gettxoutsetinfo", &gettxoutsetinfo, {"hash_type"} }, { "blockchain", "pruneblockchain", &pruneblockchain, {"height"} }, { "blockchain", "savemempool", &savemempool, {} }, { "blockchain", "verifychain", &verifychain, {"checklevel","nblocks"} }, diff --git a/src/rpc/util.h b/src/rpc/util.h index ef501db2d2..c3c725bbdf 100644 --- a/src/rpc/util.h +++ b/src/rpc/util.h @@ -5,6 +5,7 @@ #ifndef BITCOIN_RPC_UTIL_H #define BITCOIN_RPC_UTIL_H +#include #include #include #include diff --git a/src/script/interpreter.cpp b/src/script/interpreter.cpp index aac8520bf8..53e8ce415f 100644 --- a/src/script/interpreter.cpp +++ b/src/script/interpreter.cpp @@ -1404,34 +1404,37 @@ public: } }; +/** Compute the (single) SHA256 of the concatenation of all prevouts of a tx. */ template -uint256 GetPrevoutHash(const T& txTo) +uint256 GetPrevoutsSHA256(const T& txTo) { CHashWriter ss(SER_GETHASH, 0); for (const auto& txin : txTo.vin) { ss << txin.prevout; } - return ss.GetHash(); + return ss.GetSHA256(); } +/** Compute the (single) SHA256 of the concatenation of all nSequences of a tx. */ template -uint256 GetSequenceHash(const T& txTo) +uint256 GetSequencesSHA256(const T& txTo) { CHashWriter ss(SER_GETHASH, 0); for (const auto& txin : txTo.vin) { ss << txin.nSequence; } - return ss.GetHash(); + return ss.GetSHA256(); } +/** Compute the (single) SHA256 of the concatenation of all txouts of a tx. */ template -uint256 GetOutputsHash(const T& txTo) +uint256 GetOutputsSHA256(const T& txTo) { CHashWriter ss(SER_GETHASH, 0); for (const auto& txout : txTo.vout) { ss << txout; } - return ss.GetHash(); + return ss.GetSHA256(); } } // namespace @@ -1443,9 +1446,9 @@ void PrecomputedTransactionData::Init(const T& txTo, std::vector&& spent m_spent_outputs = std::move(spent_outputs); - hashPrevouts = GetPrevoutHash(txTo); - hashSequence = GetSequenceHash(txTo); - hashOutputs = GetOutputsHash(txTo); + hashPrevouts = SHA256Uint256(GetPrevoutsSHA256(txTo)); + hashSequence = SHA256Uint256(GetSequencesSHA256(txTo)); + hashOutputs = SHA256Uint256(GetOutputsSHA256(txTo)); m_ready = true; } diff --git a/src/test/crypto_tests.cpp b/src/test/crypto_tests.cpp index e2fdb7bce8..9b0c213a15 100644 --- a/src/test/crypto_tests.cpp +++ b/src/test/crypto_tests.cpp @@ -15,7 +15,9 @@ #include #include #include +#include #include +#include #include #include @@ -880,4 +882,92 @@ BOOST_AUTO_TEST_CASE(sha3_256_tests) TestSHA3_256("72c57c359e10684d0517e46653a02d18d29eff803eb009e4d5eb9e95add9ad1a4ac1f38a70296f3a369a16985ca3c957de2084cdc9bdd8994eb59b8815e0debad4ec1f001feac089820db8becdaf896aaf95721e8674e5d476b43bd2b873a7d135cd685f545b438210f9319e4dcd55986c85303c1ddf18dc746fe63a409df0a998ed376eb683e16c09e6e9018504152b3e7628ef350659fb716e058a5263a18823d2f2f6ee6a8091945a48ae1c5cb1694cf2c1fe76ef9177953afe8899cfa2b7fe0603bfa3180937dadfb66fbbdd119bbf8063338aa4a699075a3bfdbae8db7e5211d0917e9665a702fc9b0a0a901d08bea97654162d82a9f05622b060b634244779c33427eb7a29353a5f48b07cbefa72f3622ac5900bef77b71d6b314296f304c8426f451f32049b1f6af156a9dab702e8907d3cd72bb2c50493f4d593e731b285b70c803b74825b3524cda3205a8897106615260ac93c01c5ec14f5b11127783989d1824527e99e04f6a340e827b559f24db9292fcdd354838f9339a5fa1d7f6b2087f04835828b13463dd40927866f16ae33ed501ec0e6c4e63948768c5aeea3e4f6754985954bea7d61088c44430204ef491b74a64bde1358cecb2cad28ee6a3de5b752ff6a051104d88478653339457ac45ba44cbb65f54d1969d047cda746931d5e6a8b48e211416aefd5729f3d60b56b54e7f85aa2f42de3cb69419240c24e67139a11790a709edef2ac52cf35dd0a08af45926ebe9761f498ff83bfe263d6897ee97943a4b982fe3404ef0b4a45e06113c60340e0664f14799bf59cb4b3934b465fabefd87155905ee5309ba41e9e402973311831ea600b16437f71df39ee77130490c4d0227e5d1757fdc66af3ae6b9953053ed9aafca0160209858a7d4dd38fe10e0cb153672d08633ed6c54977aa0a6e67f9ff2f8c9d22dd7b21de08192960fd0e0da68d77c8d810db11dcaa61c725cd4092cbff76c8e1debd8d0361bb3f2e607911d45716f53067bdc0d89dd4889177765166a424e9fc0cb711201099dda213355e6639ac7eb86eca2ae0ab38b7f674f37ef8a6fcca1a6f52f55d9e1dcd631d2c3c82bba129172feb991d5af51afecd9d61a88b6832e4107480e392aed61a8644f551665ebff6b20953b635737a4f895e429fddcfe801f606fbda74b3bf6f5767d0fac14907fcfd0aa1d4c11b9e91b01d68052399b51a29f1ae6acd965109977c14a555cbcbd21ad8cb9f8853506d4bc21c01e62d61d7b21be1b923be54914e6b0a7ca84dd11f1159193e1184568a6134a6bbadf5b4df986edcf2019390ae841cfaa44435e28ce877d3dae4177992fa5d4e5c005876dbe3d1e63bec7dcc0942762b48b1ecc6c1a918409a8a72812a1e245c0c67be6e729c2b49bc6ee4d24a8f63e78e75db45655c26a9a78aff36fcd67117f26b8f654dca664b9f0e30681874cb749e1a692720078856286c2560b0292cc837933423147569350955c9571bf8941ba128fd339cb4268f46b94bc6ee203eb7026813706ea51c4f24c91866fc23a724bf2501327e6ae89c29f8db315dc28d2c7c719514036367e018f4835f63fdecd71f9bdced7132b6c4f8b13c69a517026fcd3622d67cb632320d5e7308f78f4b7cea11f6291b137851dc6cd6366f2785c71c3f237f81a7658b2a8d512b61e0ad5a4710b7b124151689fcb2116063fbff7e9115fed7b93de834970b838e49f8f8ba5f1f874c354078b5810a55ae289a56da563f1da6cd80a3757d6073fa55e016e45ac6cec1f69d871c92fd0ae9670c74249045e6b464787f9504128736309fed205f8df4d90e332908581298d9c75a3fa36ab0c3c9272e62de53ab290c803d67b696fd615c260a47bffad16746f18ba1a10a061bacbea9369693b3c042eec36bed289d7d12e52bca8aa1c2dff88ca7816498d25626d0f1e106ebb0b4a12138e00f3df5b1c2f49d98b1756e69b641b7c6353d99dbff050f4d76842c6cf1c2a4b062fc8e6336fa689b7c9d5c6b4ab8c15a5c20e514ff070a602d85ae52fa7810c22f8eeffd34a095b93342144f7a98d024216b3d68ed7bea047517bfcd83ec83febd1ba0e5858e2bdc1d8b1f7b0f89e90ccc432a3f930cb8209462e64556c5054c56ca2a85f16b32eb83a10459d13516faa4d23302b7607b9bd38dab2239ac9e9440c314433fdfb3ceadab4b4f87415ed6f240e017221f3b5f7ac196cdf54957bec42fe6893994b46de3d27dc7fb58ca88feb5b9e79cf20053d12530ac524337b22a3629bea52f40b06d3e2128f32060f9105847daed81d35f20e2002817434659baff64494c5b5c7f9216bfda38412a0f70511159dc73bb6bae1f8eaa0ef08d99bcb31f94f6be12c29c83df45926430b366c99fca3270c15fc4056398fdf3135b7779e3066a006961d1ac0ad1c83179ce39e87a96b722ec23aabc065badf3e188347a360772ca6a447abac7e6a44f0d4632d52926332e44a0a86bff5ce699fd063bdda3ffd4c41b53ded49fecec67f40599b934e16e3fd1bc063ad7026f8d71bfd4cbaf56599586774723194b692036f1b6bb242e2ffb9c600b5215b412764599476ce475c9e5b396fbcebd6be323dcf4d0048077400aac7500db41dc95fc7f7edbe7c9c2ec5ea89943fe13b42217eef530bbd023671509e12dfce4e1c1c82955d965e6a68aa66f6967dba48feda572db1f099d9a6dc4bc8edade852b5e824a06890dc48a6a6510ecaf8cf7620d757290e3166d431abecc624fa9ac2234d2eb783308ead45544910c633a94964b2ef5fbc409cb8835ac4147d384e12e0a5e13951f7de0ee13eafcb0ca0c04946d7804040c0a3cd088352424b097adb7aad1ca4495952f3e6c0158c02d2bcec33bfda69301434a84d9027ce02c0b9725dad118", "d894b86261436362e64241e61f6b3e6589daf64dc641f60570c4c0bf3b1f2ca3"); } +static MuHash3072 FromInt(unsigned char i) { + unsigned char tmp[32] = {i, 0}; + return MuHash3072(tmp); +} + +BOOST_AUTO_TEST_CASE(muhash_tests) +{ + uint256 out; + + for (int iter = 0; iter < 10; ++iter) { + uint256 res; + int table[4]; + for (int i = 0; i < 4; ++i) { + table[i] = g_insecure_rand_ctx.randbits(3); + } + for (int order = 0; order < 4; ++order) { + MuHash3072 acc; + for (int i = 0; i < 4; ++i) { + int t = table[i ^ order]; + if (t & 4) { + acc /= FromInt(t & 3); + } else { + acc *= FromInt(t & 3); + } + } + acc.Finalize(out); + if (order == 0) { + res = out; + } else { + BOOST_CHECK(res == out); + } + } + + MuHash3072 x = FromInt(g_insecure_rand_ctx.randbits(4)); // x=X + MuHash3072 y = FromInt(g_insecure_rand_ctx.randbits(4)); // x=X, y=Y + MuHash3072 z; // x=X, y=Y, z=1 + z *= x; // x=X, y=Y, z=X + z *= y; // x=X, y=Y, z=X*Y + y *= x; // x=X, y=Y*X, z=X*Y + z /= y; // x=X, y=Y*X, z=1 + z.Finalize(out); + + uint256 out2; + MuHash3072 a; + a.Finalize(out2); + + BOOST_CHECK_EQUAL(out, out2); + } + + MuHash3072 acc = FromInt(0); + acc *= FromInt(1); + acc /= FromInt(2); + acc.Finalize(out); + BOOST_CHECK_EQUAL(out, uint256S("10d312b100cbd32ada024a6646e40d3482fcff103668d2625f10002a607d5863")); + + MuHash3072 acc2 = FromInt(0); + unsigned char tmp[32] = {1, 0}; + acc2.Insert(tmp); + unsigned char tmp2[32] = {2, 0}; + acc2.Remove(tmp2); + acc2.Finalize(out); + BOOST_CHECK_EQUAL(out, uint256S("10d312b100cbd32ada024a6646e40d3482fcff103668d2625f10002a607d5863")); + + // Test MuHash3072 serialization + MuHash3072 serchk = FromInt(1); serchk *= FromInt(2); + std::string ser_exp = "1fa093295ea30a6a3acdc7b3f770fa538eff537528e990e2910e40bbcfd7f6696b1256901929094694b56316de342f593303dd12ac43e06dce1be1ff8301c845beb15468fff0ef002dbf80c29f26e6452bccc91b5cb9437ad410d2a67ea847887fa3c6a6553309946880fe20db2c73fe0641adbd4e86edfee0d9f8cd0ee1230898873dc13ed8ddcaf045c80faa082774279007a2253f8922ee3ef361d378a6af3ddaf180b190ac97e556888c36b3d1fb1c85aab9ccd46e3deaeb7b7cf5db067a7e9ff86b658cf3acd6662bbcce37232daa753c48b794356c020090c831a8304416e2aa7ad633c0ddb2f11be1be316a81be7f7e472071c042cb68faef549c221ebff209273638b741aba5a81675c45a5fa92fea4ca821d7a324cb1e1a2ccd3b76c4228ec8066dad2a5df6e1bd0de45c7dd5de8070bdb46db6c554cf9aefc9b7b2bbf9f75b1864d9f95005314593905c0109b71f703d49944ae94477b51dac10a816bb6d1c700bafabc8bd86fac8df24be519a2f2836b16392e18036cb13e48c5c010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000"; + CDataStream ss_chk(SER_DISK, PROTOCOL_VERSION); + ss_chk << serchk; + BOOST_CHECK_EQUAL(ser_exp, HexStr(ss_chk.str())); + + // Test MuHash3072 deserialization + MuHash3072 deserchk; + ss_chk >> deserchk; + uint256 out3; + serchk.Finalize(out); + deserchk.Finalize(out3); + BOOST_CHECK_EQUAL(HexStr(out), HexStr(out3)); + + // Test MuHash3072 overflow, meaning the internal data is larger than the modulus. + CDataStream ss_max(ParseHex("ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000"), SER_DISK, PROTOCOL_VERSION); + MuHash3072 overflowchk; + ss_max >> overflowchk; + + uint256 out4; + overflowchk.Finalize(out4); + BOOST_CHECK_EQUAL(HexStr(out4), "3a31e6903aff0de9f62f9a9f7f8b861de76ce2cda09822b90014319ae5dc2271"); +} + BOOST_AUTO_TEST_SUITE_END() diff --git a/test/functional/README.md b/test/functional/README.md index 8d5407c11f..0c4c03e9a8 100644 --- a/test/functional/README.md +++ b/test/functional/README.md @@ -118,9 +118,6 @@ Utilities for manipulating transaction scripts (originally from python-bitcoinli #### [test_framework/key.py](test_framework/key.py) Test-only secp256k1 elliptic curve implementation -#### [test_framework/bignum.py](test_framework/bignum.py) -Helpers for script.py - #### [test_framework/blocktools.py](test_framework/blocktools.py) Helper functions for creating blocks and transactions. diff --git a/test/functional/feature_utxo_set_hash.py b/test/functional/feature_utxo_set_hash.py new file mode 100755 index 0000000000..0fb3713c65 --- /dev/null +++ b/test/functional/feature_utxo_set_hash.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python3 +# Copyright (c) 2020-2021 The Bitcoin Core developers +# Distributed under the MIT software license, see the accompanying +# file COPYING or http://www.opensource.org/licenses/mit-license.php. +"""Test UTXO set hash value calculation in gettxoutsetinfo.""" + +import struct + +from test_framework.blocktools import create_transaction +from test_framework.messages import ( + CBlock, + COutPoint, + FromHex, +) +from test_framework.muhash import MuHash3072 +from test_framework.test_framework import BitcoinTestFramework +from test_framework.util import assert_equal + +class UTXOSetHashTest(BitcoinTestFramework): + def set_test_params(self): + self.num_nodes = 1 + self.setup_clean_chain = True + + def skip_test_if_missing_module(self): + self.skip_if_no_wallet() + + def test_deterministic_hash_results(self): + self.log.info("Test deterministic UTXO set hash results") + + # These depend on the setup_clean_chain option, the chain loaded from the cache + assert_equal(self.nodes[0].gettxoutsetinfo()['hash_serialized_2'], "b61ee2cb582d2f4f94493f3d480e9a59d064706e98a12be0f335a3eeadd5678a") + assert_equal(self.nodes[0].gettxoutsetinfo("muhash")['muhash'], "dd5ad2a105c2d29495f577245c357409002329b9f4d6182c0af3dc2f462555c8") + + def test_muhash_implementation(self): + self.log.info("Test MuHash implementation consistency") + + node = self.nodes[0] + + # Generate 100 blocks and remove the first since we plan to spend its + # coinbase + block_hashes = node.generate(100) + blocks = list(map(lambda block: FromHex(CBlock(), node.getblock(block, False)), block_hashes)) + spending = blocks.pop(0) + + # Create a spending transaction and mine a block which includes it + tx = create_transaction(node, spending.vtx[0].rehash(), node.getnewaddress(), amount=49) + txid = node.sendrawtransaction(hexstring=tx.serialize().hex(), maxfeerate=0) + + tx_block = node.generateblock(node.getnewaddress(), [txid])['hash'] + blocks.append(FromHex(CBlock(), node.getblock(tx_block, False))) + + # Serialize the outputs that should be in the UTXO set and add them to + # a MuHash object + muhash = MuHash3072() + + for height, block in enumerate(blocks): + # The Genesis block coinbase is not part of the UTXO set and we + # spent the first mined block + height += 2 + + for tx in block.vtx: + for n, tx_out in enumerate(tx.vout): + coinbase = 1 if not tx.vin[0].prevout.hash else 0 + + # Skip witness commitment + if (coinbase and n > 0): + continue + + data = COutPoint(int(tx.rehash(), 16), n).serialize() + data += struct.pack(" 0: - s.append((v >> ((i-1) * 8)) & 0xff) - i -= 1 - return s - -def bn2mpi(v): - have_ext = False - if v.bit_length() > 0: - have_ext = (v.bit_length() & 0x07) == 0 - - neg = False - if v < 0: - neg = True - v = -v - - s = struct.pack(b">I", bn_bytes(v, have_ext)) - ext = bytearray() - if have_ext: - ext.append(0) - v_bin = bn2bin(v) - if neg: - if have_ext: - ext[0] |= 0x80 - else: - v_bin[0] |= 0x80 - return s + ext + v_bin - -# bitcoin-specific little endian format, with implicit size -def mpi2vch(s): - r = s[4:] # strip size - r = r[::-1] # reverse string, converting BE->LE - return r - -def bn2vch(v): - return bytes(mpi2vch(bn2mpi(v))) diff --git a/test/functional/test_framework/key.py b/test/functional/test_framework/key.py index 894f54efb6..02e731640a 100644 --- a/test/functional/test_framework/key.py +++ b/test/functional/test_framework/key.py @@ -8,22 +8,7 @@ keys, and is trivially vulnerable to side channel attacks. Do not use for anything but tests.""" import random -def modinv(a, n): - """Compute the modular inverse of a modulo n - - See https://en.wikipedia.org/wiki/Extended_Euclidean_algorithm#Modular_integers. - """ - t1, t2 = 0, 1 - r1, r2 = n, a - while r2 != 0: - q = r1 // r2 - t1, t2 = t2, t1 - q * t2 - r1, r2 = r2, r1 - q * r2 - if r1 > 1: - return None - if t1 < 0: - t1 += n - return t1 +from .util import modinv def jacobi_symbol(n, k): """Compute the Jacobi symbol of n modulo k diff --git a/test/functional/test_framework/muhash.py b/test/functional/test_framework/muhash.py new file mode 100644 index 0000000000..183548f71f --- /dev/null +++ b/test/functional/test_framework/muhash.py @@ -0,0 +1,112 @@ +# Copyright (c) 2020 Pieter Wuille +# Distributed under the MIT software license, see the accompanying +# file COPYING or http://www.opensource.org/licenses/mit-license.php. +"""Native Python MuHash3072 implementation.""" + +import hashlib +import unittest + +from .util import modinv + +def rot32(v, bits): + """Rotate the 32-bit value v left by bits bits.""" + bits %= 32 # Make sure the term below does not throw an exception + return ((v << bits) & 0xffffffff) | (v >> (32 - bits)) + +def chacha20_doubleround(s): + """Apply a ChaCha20 double round to 16-element state array s. + + See https://cr.yp.to/chacha/chacha-20080128.pdf and https://tools.ietf.org/html/rfc8439 + """ + QUARTER_ROUNDS = [(0, 4, 8, 12), + (1, 5, 9, 13), + (2, 6, 10, 14), + (3, 7, 11, 15), + (0, 5, 10, 15), + (1, 6, 11, 12), + (2, 7, 8, 13), + (3, 4, 9, 14)] + + for a, b, c, d in QUARTER_ROUNDS: + s[a] = (s[a] + s[b]) & 0xffffffff + s[d] = rot32(s[d] ^ s[a], 16) + s[c] = (s[c] + s[d]) & 0xffffffff + s[b] = rot32(s[b] ^ s[c], 12) + s[a] = (s[a] + s[b]) & 0xffffffff + s[d] = rot32(s[d] ^ s[a], 8) + s[c] = (s[c] + s[d]) & 0xffffffff + s[b] = rot32(s[b] ^ s[c], 7) + +def chacha20_32_to_384(key32): + """Specialized ChaCha20 implementation with 32-byte key, 0 IV, 384-byte output.""" + # See RFC 8439 section 2.3 for chacha20 parameters + CONSTANTS = [0x61707865, 0x3320646e, 0x79622d32, 0x6b206574] + + key_bytes = [0]*8 + for i in range(8): + key_bytes[i] = int.from_bytes(key32[(4 * i):(4 * (i+1))], 'little') + + INITIALIZATION_VECTOR = [0] * 4 + init = CONSTANTS + key_bytes + INITIALIZATION_VECTOR + out = bytearray() + for counter in range(6): + init[12] = counter + s = init.copy() + for _ in range(10): + chacha20_doubleround(s) + for i in range(16): + out.extend(((s[i] + init[i]) & 0xffffffff).to_bytes(4, 'little')) + return bytes(out) + +def data_to_num3072(data): + """Hash a 32-byte array data to a 3072-bit number using 6 Chacha20 operations.""" + bytes384 = chacha20_32_to_384(data) + return int.from_bytes(bytes384, 'little') + +class MuHash3072: + """Class representing the MuHash3072 computation of a set. + + See https://cseweb.ucsd.edu/~mihir/papers/inchash.pdf and https://lists.linuxfoundation.org/pipermail/bitcoin-dev/2017-May/014337.html + """ + + MODULUS = 2**3072 - 1103717 + + def __init__(self): + """Initialize for an empty set.""" + self.numerator = 1 + self.denominator = 1 + + def insert(self, data): + """Insert a byte array data in the set.""" + data_hash = hashlib.sha256(data).digest() + self.numerator = (self.numerator * data_to_num3072(data_hash)) % self.MODULUS + + def remove(self, data): + """Remove a byte array from the set.""" + data_hash = hashlib.sha256(data).digest() + self.denominator = (self.denominator * data_to_num3072(data_hash)) % self.MODULUS + + def digest(self): + """Extract the final hash. Does not modify this object.""" + val = (self.numerator * modinv(self.denominator, self.MODULUS)) % self.MODULUS + bytes384 = val.to_bytes(384, 'little') + return hashlib.sha256(bytes384).digest() + +class TestFrameworkMuhash(unittest.TestCase): + def test_muhash(self): + muhash = MuHash3072() + muhash.insert(b'\x00' * 32) + muhash.insert((b'\x01' + b'\x00' * 31)) + muhash.remove((b'\x02' + b'\x00' * 31)) + finalized = muhash.digest() + # This mirrors the result in the C++ MuHash3072 unit test + self.assertEqual(finalized[::-1].hex(), "10d312b100cbd32ada024a6646e40d3482fcff103668d2625f10002a607d5863") + + def test_chacha20(self): + def chacha_check(key, result): + self.assertEqual(chacha20_32_to_384(key)[:64].hex(), result) + + # Test vectors from https://tools.ietf.org/html/draft-agl-tls-chacha20poly1305-04#section-7 + # Since the nonce is hardcoded to 0 in our function we only use those vectors. + chacha_check([0]*32, "76b8e0ada0f13d90405d6ae55386bd28bdd219b8a08ded1aa836efcc8b770dc7da41597c5157488d7724e03fb8d84a376a43b8f41518a11cc387b669b2ee6586") + chacha_check([0]*31 + [1], "4540f05a9f1fb296d7736e7b208e3c96eb4fe1834688d2604f450952ed432d41bbe2a0b6ea7566d2a5d1e7e20d42af2c53d792b1c43fea817e9ad275ae546963") diff --git a/test/functional/test_framework/script.py b/test/functional/test_framework/script.py index 943b404deb..1f74d43b9f 100644 --- a/test/functional/test_framework/script.py +++ b/test/functional/test_framework/script.py @@ -6,20 +6,33 @@ This file is modified from python-bitcoinlib. """ - import hashlib import struct +import unittest -from .bignum import bn2vch -from .messages import CTransaction, CTxOut, sha256, hash256 +from .messages import ( + CTransaction, + CTxOut, + hash256, + sha256, +) MAX_SCRIPT_ELEMENT_SIZE = 520 - OPCODE_NAMES = {} def hash160(s): return hashlib.new('ripemd160', sha256(s)).digest() +def bn2vch(v): + """Convert number to bitcoin-specific little endian format.""" + # We need v.bit_length() bits, plus a sign bit for every nonzero number. + n_bits = v.bit_length() + (v != 0) + # The number of bytes for that is: + n_bytes = (n_bits + 7) // 8 + # Convert number to absolute value + sign in top bit. + encoded_v = 0 if v == 0 else abs(v) | ((v < 0) << (n_bytes * 8 - 1)) + # Serialize to bytes + return encoded_v.to_bytes(n_bytes, 'little') _opcode_instances = [] class CScriptOp(int): @@ -30,13 +43,13 @@ class CScriptOp(int): def encode_op_pushdata(d): """Encode a PUSHDATA op, returning bytes""" if len(d) < 0x4c: - return b'' + bytes([len(d)]) + d # OP_PUSHDATA + return b'' + bytes([len(d)]) + d # OP_PUSHDATA elif len(d) <= 0xff: - return b'\x4c' + bytes([len(d)]) + d # OP_PUSHDATA1 + return b'\x4c' + bytes([len(d)]) + d # OP_PUSHDATA1 elif len(d) <= 0xffff: - return b'\x4d' + struct.pack(b'= 0x80: # Mask for all but the highest result bit - num_mask = (2**(len(value)*8) - 1) >> 1 + num_mask = (2**(len(value) * 8) - 1) >> 1 result &= num_mask result *= -1 return result @@ -494,21 +507,20 @@ class CScript(bytes): pushdata_type = 'PUSHDATA2' if i + 1 >= len(self): raise CScriptInvalidError('PUSHDATA2: missing data length') - datasize = self[i] + (self[i+1] << 8) + datasize = self[i] + (self[i + 1] << 8) i += 2 elif opcode == OP_PUSHDATA4: pushdata_type = 'PUSHDATA4' if i + 3 >= len(self): raise CScriptInvalidError('PUSHDATA4: missing data length') - datasize = self[i] + (self[i+1] << 8) + (self[i+2] << 16) + (self[i+3] << 24) + datasize = self[i] + (self[i + 1] << 8) + (self[i + 2] << 16) + (self[i + 3] << 24) i += 4 else: - assert False # shouldn't happen + assert False # shouldn't happen - - data = bytes(self[i:i+datasize]) + data = bytes(self[i:i + datasize]) # Check for truncation if len(data) < datasize: @@ -658,3 +670,25 @@ def SignatureHash(script, txTo, inIdx, hashtype): hash = hash256(s) return (hash, None) + +class TestFrameworkScript(unittest.TestCase): + def test_bn2vch(self): + self.assertEqual(bn2vch(0), bytes([])) + self.assertEqual(bn2vch(1), bytes([0x01])) + self.assertEqual(bn2vch(-1), bytes([0x81])) + self.assertEqual(bn2vch(0x7F), bytes([0x7F])) + self.assertEqual(bn2vch(-0x7F), bytes([0xFF])) + self.assertEqual(bn2vch(0x80), bytes([0x80, 0x00])) + self.assertEqual(bn2vch(-0x80), bytes([0x80, 0x80])) + self.assertEqual(bn2vch(0xFF), bytes([0xFF, 0x00])) + self.assertEqual(bn2vch(-0xFF), bytes([0xFF, 0x80])) + self.assertEqual(bn2vch(0x100), bytes([0x00, 0x01])) + self.assertEqual(bn2vch(-0x100), bytes([0x00, 0x81])) + self.assertEqual(bn2vch(0x7FFF), bytes([0xFF, 0x7F])) + self.assertEqual(bn2vch(-0x8000), bytes([0x00, 0x80, 0x80])) + self.assertEqual(bn2vch(-0x7FFFFF), bytes([0xFF, 0xFF, 0xFF])) + self.assertEqual(bn2vch(0x80000000), bytes([0x00, 0x00, 0x00, 0x80, 0x00])) + self.assertEqual(bn2vch(-0x80000000), bytes([0x00, 0x00, 0x00, 0x80, 0x80])) + self.assertEqual(bn2vch(0xFFFFFFFF), bytes([0xFF, 0xFF, 0xFF, 0xFF, 0x00])) + self.assertEqual(bn2vch(123456789), bytes([0x15, 0xCD, 0x5B, 0x07])) + self.assertEqual(bn2vch(-54321), bytes([0x31, 0xD4, 0x80])) diff --git a/test/functional/test_framework/util.py b/test/functional/test_framework/util.py index 5e5dd0d703..1335b39eb8 100644 --- a/test/functional/test_framework/util.py +++ b/test/functional/test_framework/util.py @@ -18,6 +18,7 @@ import random import shutil import re import time +import unittest from . import coverage from .authproxy import AuthServiceProxy, JSONRPCException @@ -678,3 +679,33 @@ def find_vout_for_address(node, txid, addr): if any([addr == a for a in tx["vout"][i]["scriptPubKey"]["addresses"]]): return i raise RuntimeError("Vout not found for address: txid=%s, addr=%s" % (txid, addr)) + +def modinv(a, n): + """Compute the modular inverse of a modulo n using the extended Euclidean + Algorithm. See https://en.wikipedia.org/wiki/Extended_Euclidean_algorithm#Modular_integers. + """ + # TODO: Change to pow(a, -1, n) available in Python 3.8 + t1, t2 = 0, 1 + r1, r2 = n, a + while r2 != 0: + q = r1 // r2 + t1, t2 = t2, t1 - q * t2 + r1, r2 = r2, r1 - q * r2 + if r1 > 1: + return None + if t1 < 0: + t1 += n + return t1 + +class TestFrameworkUtil(unittest.TestCase): + def test_modinv(self): + test_vectors = [ + [7, 11], + [11, 29], + [90, 13], + [1891, 3797], + [6003722857, 77695236973], + ] + + for a, n in test_vectors: + self.assertEqual(modinv(a, n), pow(a, n-2, n)) diff --git a/test/functional/test_runner.py b/test/functional/test_runner.py index 06e316421e..cfa299ab08 100755 --- a/test/functional/test_runner.py +++ b/test/functional/test_runner.py @@ -25,6 +25,7 @@ import subprocess import tempfile import re import logging +import unittest # Formatting. Default colors to empty strings. BOLD, GREEN, RED, GREY = ("", ""), ("", ""), ("", ""), ("", "") @@ -66,6 +67,12 @@ if os.name != 'nt' or sys.getwindowsversion() >= (10, 0, 14393): TEST_EXIT_PASSED = 0 TEST_EXIT_SKIPPED = 77 +TEST_FRAMEWORK_MODULES = [ + "muhash", + "script", + "util", +] + EXTENDED_SCRIPTS = [ # These tests are not run by default. # Longest test should go first, to favor running tests in parallel @@ -176,6 +183,7 @@ BASE_SCRIPTS = [ 'rpc_getblockfilter.py', 'rpc_invalidateblock.py', 'feature_txindex.py', + 'feature_utxo_set_hash.py', 'mempool_packages.py', 'mempool_package_onemore.py', 'feature_versionbits_warning.py', @@ -392,6 +400,16 @@ def run_tests(*, test_list, src_dir, build_dir, tmpdir, jobs=1, enable_coverage= if os.path.isdir(cache_dir): print("%sWARNING!%s There is a cache directory here: %s. If tests fail unexpectedly, try deleting the cache directory." % (BOLD[1], BOLD[0], cache_dir)) + # Test Framework Tests + print("Running Unit Tests for Test Framework Modules") + test_framework_tests = unittest.TestSuite() + for module in TEST_FRAMEWORK_MODULES: + test_framework_tests.addTest(unittest.TestLoader().loadTestsFromName("test_framework.{}".format(module))) + result = unittest.TextTestRunner(verbosity=1, failfast=True).run(test_framework_tests) + if not result.wasSuccessful(): + logging.debug("Early exiting after failure in TestFramework unit tests") + sys.exit(False) + tests_dir = src_dir + '/test/functional/' flags = ['--cachedir={}'.format(cache_dir)] + args @@ -617,7 +635,7 @@ class TestResult(): def check_script_prefixes(): """Check that test scripts start with one of the allowed name prefixes.""" - good_prefixes_re = re.compile("(example|feature|interface|mempool|mining|p2p|rpc|wallet|tool)_") + good_prefixes_re = re.compile("^(example|feature|interface|mempool|mining|p2p|rpc|wallet|tool)_") bad_script_names = [script for script in ALL_SCRIPTS if good_prefixes_re.match(script) is None] if bad_script_names: