From b04b71a9570912b08ff2e3da9c38e5db737a79e4 Mon Sep 17 00:00:00 2001 From: Kittywhiskers Van Gogh <63189531+kwvg@users.noreply.github.com> Date: Sun, 13 Oct 2024 16:36:33 +0000 Subject: [PATCH] merge bitcoin#24858: incorrect blk file size calculation during reindex results in recoverable blk file corruption --- src/Makefile.test.include | 1 + src/node/blockstorage.cpp | 13 +++++++---- src/node/blockstorage.h | 4 ++++ src/test/blockmanager_tests.cpp | 39 +++++++++++++++++++++++++++++++++ src/validation.cpp | 13 ++++++++++- 5 files changed, 65 insertions(+), 5 deletions(-) create mode 100644 src/test/blockmanager_tests.cpp diff --git a/src/Makefile.test.include b/src/Makefile.test.include index 839224e9a1..a9b24c0007 100644 --- a/src/Makefile.test.include +++ b/src/Makefile.test.include @@ -89,6 +89,7 @@ BITCOIN_TESTS =\ test/blockencodings_tests.cpp \ test/blockfilter_tests.cpp \ test/blockfilter_index_tests.cpp \ + test/blockmanager_tests.cpp \ test/bloom_tests.cpp \ test/bls_tests.cpp \ test/bswap_tests.cpp \ diff --git a/src/node/blockstorage.cpp b/src/node/blockstorage.cpp index 960759396f..2c8cacccef 100644 --- a/src/node/blockstorage.cpp +++ b/src/node/blockstorage.cpp @@ -790,19 +790,24 @@ bool ReadBlockFromDisk(CBlock& block, const CBlockIndex* pindex, const Consensus return true; } -/** Store block on disk. If dbp is non-nullptr, the file is known to already reside on disk */ FlatFilePos BlockManager::SaveBlockToDisk(const CBlock& block, int nHeight, CChain& active_chain, const CChainParams& chainparams, const FlatFilePos* dbp) { unsigned int nBlockSize = ::GetSerializeSize(block, CLIENT_VERSION); FlatFilePos blockPos; - if (dbp != nullptr) { + const auto position_known {dbp != nullptr}; + if (position_known) { blockPos = *dbp; + } else { + // when known, blockPos.nPos points at the offset of the block data in the blk file. that already accounts for + // the serialization header present in the file (the 4 magic message start bytes + the 4 length bytes = 8 bytes = BLOCK_SERIALIZATION_HEADER_SIZE). + // we add BLOCK_SERIALIZATION_HEADER_SIZE only for new blocks since they will have the serialization header added when written to disk. + nBlockSize += static_cast(BLOCK_SERIALIZATION_HEADER_SIZE); } - if (!FindBlockPos(blockPos, nBlockSize + 8, nHeight, active_chain, block.GetBlockTime(), dbp != nullptr)) { + if (!FindBlockPos(blockPos, nBlockSize, nHeight, active_chain, block.GetBlockTime(), position_known)) { error("%s: FindBlockPos failed", __func__); return FlatFilePos(); } - if (dbp == nullptr) { + if (!position_known) { if (!WriteBlockToDisk(block, blockPos, chainparams.MessageStart())) { AbortNode("Failed to write block"); return FlatFilePos(); diff --git a/src/node/blockstorage.h b/src/node/blockstorage.h index 7f671fd5f8..1b919df908 100644 --- a/src/node/blockstorage.h +++ b/src/node/blockstorage.h @@ -47,6 +47,9 @@ static const unsigned int UNDOFILE_CHUNK_SIZE = 0x100000; // 1 MiB /** The maximum size of a blk?????.dat file (since 0.8) */ static const unsigned int MAX_BLOCKFILE_SIZE = 0x8000000; // 128 MiB +/** Size of header written by WriteBlockToDisk before a serialized CBlock */ +static constexpr size_t BLOCK_SERIALIZATION_HEADER_SIZE = CMessageHeader::MESSAGE_START_SIZE + sizeof(unsigned int); + extern std::atomic_bool fImporting; extern std::atomic_bool fReindex; /** Pruning-related variables and constants */ @@ -185,6 +188,7 @@ public: bool WriteUndoDataForBlock(const CBlockUndo& blockundo, BlockValidationState& state, CBlockIndex* pindex, const CChainParams& chainparams) EXCLUSIVE_LOCKS_REQUIRED(::cs_main); + /** Store block on disk. If dbp is not nullptr, then it provides the known position of the block within a block file on disk. */ FlatFilePos SaveBlockToDisk(const CBlock& block, int nHeight, CChain& active_chain, const CChainParams& chainparams, const FlatFilePos* dbp); /** Calculate the amount of disk space the block & undo files currently use */ diff --git a/src/test/blockmanager_tests.cpp b/src/test/blockmanager_tests.cpp new file mode 100644 index 0000000000..a9ce570a3d --- /dev/null +++ b/src/test/blockmanager_tests.cpp @@ -0,0 +1,39 @@ +// Copyright (c) 2022 The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#include +#include +#include +#include + +#include +#include + +// use BasicTestingSetup here for the data directory configuration, setup, and cleanup +BOOST_FIXTURE_TEST_SUITE(blockmanager_tests, BasicTestingSetup) + +BOOST_AUTO_TEST_CASE(blockmanager_find_block_pos) +{ + const auto params {CreateChainParams(ArgsManager{}, CBaseChainParams::MAIN)}; + BlockManager blockman {}; + CChain chain {}; + // simulate adding a genesis block normally + BOOST_CHECK_EQUAL(blockman.SaveBlockToDisk(params->GenesisBlock(), 0, chain, *params, nullptr).nPos, BLOCK_SERIALIZATION_HEADER_SIZE); + // simulate what happens during reindex + // simulate a well-formed genesis block being found at offset 8 in the blk00000.dat file + // the block is found at offset 8 because there is an 8 byte serialization header + // consisting of 4 magic bytes + 4 length bytes before each block in a well-formed blk file. + FlatFilePos pos{0, BLOCK_SERIALIZATION_HEADER_SIZE}; + BOOST_CHECK_EQUAL(blockman.SaveBlockToDisk(params->GenesisBlock(), 0, chain, *params, &pos).nPos, BLOCK_SERIALIZATION_HEADER_SIZE); + // now simulate what happens after reindex for the first new block processed + // the actual block contents don't matter, just that it's a block. + // verify that the write position is at offset 0x12d. + // this is a check to make sure that https://github.com/bitcoin/bitcoin/issues/21379 does not recur + // 8 bytes (for serialization header) + 285 (for serialized genesis block) = 293 + // add another 8 bytes for the second block's serialization header and we get 293 + 8 = 301 + FlatFilePos actual{blockman.SaveBlockToDisk(params->GenesisBlock(), 1, chain, *params, nullptr)}; + BOOST_CHECK_EQUAL(actual.nPos, BLOCK_SERIALIZATION_HEADER_SIZE + ::GetSerializeSize(params->GenesisBlock(), CLIENT_VERSION) + BLOCK_SERIALIZATION_HEADER_SIZE); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/validation.cpp b/src/validation.cpp index efe4e2cfe2..844ad7eb6e 100644 --- a/src/validation.cpp +++ b/src/validation.cpp @@ -4705,7 +4705,18 @@ void CChainState::LoadExternalBlockFile( } } } catch (const std::exception& e) { - LogPrintf("%s: Deserialize or I/O error - %s\n", __func__, e.what()); + // historical bugs added extra data to the block files that does not deserialize cleanly. + // commonly this data is between readable blocks, but it does not really matter. such data is not fatal to the import process. + // the code that reads the block files deals with invalid data by simply ignoring it. + // it continues to search for the next {4 byte magic message start bytes + 4 byte length + block} that does deserialize cleanly + // and passes all of the other block validation checks dealing with POW and the merkle root, etc... + // we merely note with this informational log message when unexpected data is encountered. + // we could also be experiencing a storage system read error, or a read of a previous bad write. these are possible, but + // less likely scenarios. we don't have enough information to tell a difference here. + // the reindex process is not the place to attempt to clean and/or compact the block files. if so desired, a studious node operator + // may use knowledge of the fact that the block files are not entirely pristine in order to prepare a set of pristine, and + // perhaps ordered, block files for later reindexing. + LogPrint(BCLog::REINDEX, "%s: unexpected data at file offset 0x%x - %s. continuing\n", __func__, (nRewind - 1), e.what()); } } } catch (const std::runtime_error& e) {