merge bitcoin#24858: incorrect blk file size calculation during reindex results in recoverable blk file corruption

This commit is contained in:
Kittywhiskers Van Gogh 2024-10-13 16:36:33 +00:00
parent 9e75b99c53
commit b04b71a957
No known key found for this signature in database
GPG Key ID: 30CD0C065E5C4AAD
5 changed files with 65 additions and 5 deletions

View File

@ -89,6 +89,7 @@ BITCOIN_TESTS =\
test/blockencodings_tests.cpp \
test/blockfilter_tests.cpp \
test/blockfilter_index_tests.cpp \
test/blockmanager_tests.cpp \
test/bloom_tests.cpp \
test/bls_tests.cpp \
test/bswap_tests.cpp \

View File

@ -790,19 +790,24 @@ bool ReadBlockFromDisk(CBlock& block, const CBlockIndex* pindex, const Consensus
return true;
}
/** Store block on disk. If dbp is non-nullptr, the file is known to already reside on disk */
FlatFilePos BlockManager::SaveBlockToDisk(const CBlock& block, int nHeight, CChain& active_chain, const CChainParams& chainparams, const FlatFilePos* dbp)
{
unsigned int nBlockSize = ::GetSerializeSize(block, CLIENT_VERSION);
FlatFilePos blockPos;
if (dbp != nullptr) {
const auto position_known {dbp != nullptr};
if (position_known) {
blockPos = *dbp;
} else {
// when known, blockPos.nPos points at the offset of the block data in the blk file. that already accounts for
// the serialization header present in the file (the 4 magic message start bytes + the 4 length bytes = 8 bytes = BLOCK_SERIALIZATION_HEADER_SIZE).
// we add BLOCK_SERIALIZATION_HEADER_SIZE only for new blocks since they will have the serialization header added when written to disk.
nBlockSize += static_cast<unsigned int>(BLOCK_SERIALIZATION_HEADER_SIZE);
}
if (!FindBlockPos(blockPos, nBlockSize + 8, nHeight, active_chain, block.GetBlockTime(), dbp != nullptr)) {
if (!FindBlockPos(blockPos, nBlockSize, nHeight, active_chain, block.GetBlockTime(), position_known)) {
error("%s: FindBlockPos failed", __func__);
return FlatFilePos();
}
if (dbp == nullptr) {
if (!position_known) {
if (!WriteBlockToDisk(block, blockPos, chainparams.MessageStart())) {
AbortNode("Failed to write block");
return FlatFilePos();

View File

@ -47,6 +47,9 @@ static const unsigned int UNDOFILE_CHUNK_SIZE = 0x100000; // 1 MiB
/** The maximum size of a blk?????.dat file (since 0.8) */
static const unsigned int MAX_BLOCKFILE_SIZE = 0x8000000; // 128 MiB
/** Size of header written by WriteBlockToDisk before a serialized CBlock */
static constexpr size_t BLOCK_SERIALIZATION_HEADER_SIZE = CMessageHeader::MESSAGE_START_SIZE + sizeof(unsigned int);
extern std::atomic_bool fImporting;
extern std::atomic_bool fReindex;
/** Pruning-related variables and constants */
@ -185,6 +188,7 @@ public:
bool WriteUndoDataForBlock(const CBlockUndo& blockundo, BlockValidationState& state, CBlockIndex* pindex, const CChainParams& chainparams)
EXCLUSIVE_LOCKS_REQUIRED(::cs_main);
/** Store block on disk. If dbp is not nullptr, then it provides the known position of the block within a block file on disk. */
FlatFilePos SaveBlockToDisk(const CBlock& block, int nHeight, CChain& active_chain, const CChainParams& chainparams, const FlatFilePos* dbp);
/** Calculate the amount of disk space the block & undo files currently use */

View File

@ -0,0 +1,39 @@
// Copyright (c) 2022 The Bitcoin Core developers
// Distributed under the MIT software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
#include <chainparams.h>
#include <node/blockstorage.h>
#include <node/context.h>
#include <validation.h>
#include <boost/test/unit_test.hpp>
#include <test/util/setup_common.h>
// use BasicTestingSetup here for the data directory configuration, setup, and cleanup
BOOST_FIXTURE_TEST_SUITE(blockmanager_tests, BasicTestingSetup)
BOOST_AUTO_TEST_CASE(blockmanager_find_block_pos)
{
const auto params {CreateChainParams(ArgsManager{}, CBaseChainParams::MAIN)};
BlockManager blockman {};
CChain chain {};
// simulate adding a genesis block normally
BOOST_CHECK_EQUAL(blockman.SaveBlockToDisk(params->GenesisBlock(), 0, chain, *params, nullptr).nPos, BLOCK_SERIALIZATION_HEADER_SIZE);
// simulate what happens during reindex
// simulate a well-formed genesis block being found at offset 8 in the blk00000.dat file
// the block is found at offset 8 because there is an 8 byte serialization header
// consisting of 4 magic bytes + 4 length bytes before each block in a well-formed blk file.
FlatFilePos pos{0, BLOCK_SERIALIZATION_HEADER_SIZE};
BOOST_CHECK_EQUAL(blockman.SaveBlockToDisk(params->GenesisBlock(), 0, chain, *params, &pos).nPos, BLOCK_SERIALIZATION_HEADER_SIZE);
// now simulate what happens after reindex for the first new block processed
// the actual block contents don't matter, just that it's a block.
// verify that the write position is at offset 0x12d.
// this is a check to make sure that https://github.com/bitcoin/bitcoin/issues/21379 does not recur
// 8 bytes (for serialization header) + 285 (for serialized genesis block) = 293
// add another 8 bytes for the second block's serialization header and we get 293 + 8 = 301
FlatFilePos actual{blockman.SaveBlockToDisk(params->GenesisBlock(), 1, chain, *params, nullptr)};
BOOST_CHECK_EQUAL(actual.nPos, BLOCK_SERIALIZATION_HEADER_SIZE + ::GetSerializeSize(params->GenesisBlock(), CLIENT_VERSION) + BLOCK_SERIALIZATION_HEADER_SIZE);
}
BOOST_AUTO_TEST_SUITE_END()

View File

@ -4705,7 +4705,18 @@ void CChainState::LoadExternalBlockFile(
}
}
} catch (const std::exception& e) {
LogPrintf("%s: Deserialize or I/O error - %s\n", __func__, e.what());
// historical bugs added extra data to the block files that does not deserialize cleanly.
// commonly this data is between readable blocks, but it does not really matter. such data is not fatal to the import process.
// the code that reads the block files deals with invalid data by simply ignoring it.
// it continues to search for the next {4 byte magic message start bytes + 4 byte length + block} that does deserialize cleanly
// and passes all of the other block validation checks dealing with POW and the merkle root, etc...
// we merely note with this informational log message when unexpected data is encountered.
// we could also be experiencing a storage system read error, or a read of a previous bad write. these are possible, but
// less likely scenarios. we don't have enough information to tell a difference here.
// the reindex process is not the place to attempt to clean and/or compact the block files. if so desired, a studious node operator
// may use knowledge of the fact that the block files are not entirely pristine in order to prepare a set of pristine, and
// perhaps ordered, block files for later reindexing.
LogPrint(BCLog::REINDEX, "%s: unexpected data at file offset 0x%x - %s. continuing\n", __func__, (nRewind - 1), e.what());
}
}
} catch (const std::runtime_error& e) {