eece63fa72
This switches the Merkle tree logic for blocks to one that runs in constant (small) space. The old code is moved to tests, and a new test is added that for various combinations of block sizes, transaction positions to compute a branch for, and mutations: * Verifies that the old code and new code agree for the Merkle root. * Verifies that the old code and new code agree for the Merkle branch. * Verifies that the computed Merkle branch is valid. * Verifies that mutations don't change the Merkle root. * Verifies that mutations are correctly detected.
173 lines
7.0 KiB
C++
173 lines
7.0 KiB
C++
#include "merkle.h"
|
|
#include "hash.h"
|
|
#include "utilstrencodings.h"
|
|
|
|
/* WARNING! If you're reading this because you're learning about crypto
|
|
and/or designing a new system that will use merkle trees, keep in mind
|
|
that the following merkle tree algorithm has a serious flaw related to
|
|
duplicate txids, resulting in a vulnerability (CVE-2012-2459).
|
|
|
|
The reason is that if the number of hashes in the list at a given time
|
|
is odd, the last one is duplicated before computing the next level (which
|
|
is unusual in Merkle trees). This results in certain sequences of
|
|
transactions leading to the same merkle root. For example, these two
|
|
trees:
|
|
|
|
A A
|
|
/ \ / \
|
|
B C B C
|
|
/ \ | / \ / \
|
|
D E F D E F F
|
|
/ \ / \ / \ / \ / \ / \ / \
|
|
1 2 3 4 5 6 1 2 3 4 5 6 5 6
|
|
|
|
for transaction lists [1,2,3,4,5,6] and [1,2,3,4,5,6,5,6] (where 5 and
|
|
6 are repeated) result in the same root hash A (because the hash of both
|
|
of (F) and (F,F) is C).
|
|
|
|
The vulnerability results from being able to send a block with such a
|
|
transaction list, with the same merkle root, and the same block hash as
|
|
the original without duplication, resulting in failed validation. If the
|
|
receiving node proceeds to mark that block as permanently invalid
|
|
however, it will fail to accept further unmodified (and thus potentially
|
|
valid) versions of the same block. We defend against this by detecting
|
|
the case where we would hash two identical hashes at the end of the list
|
|
together, and treating that identically to the block having an invalid
|
|
merkle root. Assuming no double-SHA256 collisions, this will detect all
|
|
known ways of changing the transactions without affecting the merkle
|
|
root.
|
|
*/
|
|
|
|
/* This implements a constant-space merkle root/path calculator, limited to 2^32 leaves. */
|
|
static void MerkleComputation(const std::vector<uint256>& leaves, uint256* proot, bool* pmutated, uint32_t branchpos, std::vector<uint256>* pbranch) {
|
|
if (pbranch) pbranch->clear();
|
|
if (leaves.size() == 0) {
|
|
if (pmutated) *pmutated = false;
|
|
if (proot) *proot = uint256();
|
|
return;
|
|
}
|
|
bool mutated = false;
|
|
// count is the number of leaves processed so far.
|
|
uint32_t count = 0;
|
|
// inner is an array of eagerly computed subtree hashes, indexed by tree
|
|
// level (0 being the leaves).
|
|
// For example, when count is 25 (11001 in binary), inner[4] is the hash of
|
|
// the first 16 leaves, inner[3] of the next 8 leaves, and inner[0] equal to
|
|
// the last leaf. The other inner entries are undefined.
|
|
uint256 inner[32];
|
|
// Which position in inner is a hash that depends on the matching leaf.
|
|
int matchlevel = -1;
|
|
// First process all leaves into 'inner' values.
|
|
while (count < leaves.size()) {
|
|
uint256 h = leaves[count];
|
|
bool matchh = count == branchpos;
|
|
count++;
|
|
int level;
|
|
// For each of the lower bits in count that are 0, do 1 step. Each
|
|
// corresponds to an inner value that existed before processing the
|
|
// current leaf, and each needs a hash to combine it.
|
|
for (level = 0; !(count & (((uint32_t)1) << level)); level++) {
|
|
if (pbranch) {
|
|
if (matchh) {
|
|
pbranch->push_back(inner[level]);
|
|
} else if (matchlevel == level) {
|
|
pbranch->push_back(h);
|
|
matchh = true;
|
|
}
|
|
}
|
|
mutated |= (inner[level] == h);
|
|
CHash256().Write(inner[level].begin(), 32).Write(h.begin(), 32).Finalize(h.begin());
|
|
}
|
|
// Store the resulting hash at inner position level.
|
|
inner[level] = h;
|
|
if (matchh) {
|
|
matchlevel = level;
|
|
}
|
|
}
|
|
// Do a final 'sweep' over the rightmost branch of the tree to process
|
|
// odd levels, and reduce everything to a single top value.
|
|
// Level is the level (counted from the bottom) up to which we've sweeped.
|
|
int level = 0;
|
|
// As long as bit number level in count is zero, skip it. It means there
|
|
// is nothing left at this level.
|
|
while (!(count & (((uint32_t)1) << level))) {
|
|
level++;
|
|
}
|
|
uint256 h = inner[level];
|
|
bool matchh = matchlevel == level;
|
|
while (count != (((uint32_t)1) << level)) {
|
|
// If we reach this point, h is an inner value that is not the top.
|
|
// We combine it with itself (Bitcoin's special rule for odd levels in
|
|
// the tree) to produce a higher level one.
|
|
if (pbranch && matchh) {
|
|
pbranch->push_back(h);
|
|
}
|
|
CHash256().Write(h.begin(), 32).Write(h.begin(), 32).Finalize(h.begin());
|
|
// Increment count to the value it would have if two entries at this
|
|
// level had existed.
|
|
count += (((uint32_t)1) << level);
|
|
level++;
|
|
// And propagate the result upwards accordingly.
|
|
while (!(count & (((uint32_t)1) << level))) {
|
|
if (pbranch) {
|
|
if (matchh) {
|
|
pbranch->push_back(inner[level]);
|
|
} else if (matchlevel == level) {
|
|
pbranch->push_back(h);
|
|
matchh = true;
|
|
}
|
|
}
|
|
CHash256().Write(inner[level].begin(), 32).Write(h.begin(), 32).Finalize(h.begin());
|
|
level++;
|
|
}
|
|
}
|
|
// Return result.
|
|
if (pmutated) *pmutated = mutated;
|
|
if (proot) *proot = h;
|
|
}
|
|
|
|
uint256 ComputeMerkleRoot(const std::vector<uint256>& leaves, bool* mutated) {
|
|
uint256 hash;
|
|
MerkleComputation(leaves, &hash, mutated, -1, NULL);
|
|
return hash;
|
|
}
|
|
|
|
std::vector<uint256> ComputeMerkleBranch(const std::vector<uint256>& leaves, uint32_t position) {
|
|
std::vector<uint256> ret;
|
|
MerkleComputation(leaves, NULL, NULL, position, &ret);
|
|
return ret;
|
|
}
|
|
|
|
uint256 ComputeMerkleRootFromBranch(const uint256& leaf, const std::vector<uint256>& vMerkleBranch, uint32_t nIndex) {
|
|
uint256 hash = leaf;
|
|
for (std::vector<uint256>::const_iterator it = vMerkleBranch.begin(); it != vMerkleBranch.end(); ++it) {
|
|
if (nIndex & 1) {
|
|
hash = Hash(BEGIN(*it), END(*it), BEGIN(hash), END(hash));
|
|
} else {
|
|
hash = Hash(BEGIN(hash), END(hash), BEGIN(*it), END(*it));
|
|
}
|
|
nIndex >>= 1;
|
|
}
|
|
return hash;
|
|
}
|
|
|
|
uint256 BlockMerkleRoot(const CBlock& block, bool* mutated)
|
|
{
|
|
std::vector<uint256> leaves;
|
|
leaves.resize(block.vtx.size());
|
|
for (size_t s = 0; s < block.vtx.size(); s++) {
|
|
leaves[s] = block.vtx[s].GetHash();
|
|
}
|
|
return ComputeMerkleRoot(leaves, mutated);
|
|
}
|
|
|
|
std::vector<uint256> BlockMerkleBranch(const CBlock& block, uint32_t position)
|
|
{
|
|
std::vector<uint256> leaves;
|
|
leaves.resize(block.vtx.size());
|
|
for (size_t s = 0; s < block.vtx.size(); s++) {
|
|
leaves[s] = block.vtx[s].GetHash();
|
|
}
|
|
return ComputeMerkleBranch(leaves, position);
|
|
}
|