From 3bd584c845e7aae4fbc94a0f1456dca56a3a5629 Mon Sep 17 00:00:00 2001 From: Kittywhiskers Van Gogh <63189531+kwvg@users.noreply.github.com> Date: Sun, 13 Oct 2024 16:16:07 +0000 Subject: [PATCH] merge bitcoin#24832: Verify the block filter hash when reading the filter from disk --- src/bench/gcs_filter.cpp | 80 ++++++++++++++++++++++++++-------- src/blockfilter.cpp | 8 ++-- src/blockfilter.h | 6 +-- src/index/blockfilterindex.cpp | 13 ++++-- src/index/blockfilterindex.h | 2 +- 5 files changed, 81 insertions(+), 28 deletions(-) diff --git a/src/bench/gcs_filter.cpp b/src/bench/gcs_filter.cpp index 3dd53bab1b..7f08cbec68 100644 --- a/src/bench/gcs_filter.cpp +++ b/src/bench/gcs_filter.cpp @@ -5,38 +5,84 @@ #include #include -static void ConstructGCSFilter(benchmark::Bench& bench) +static const GCSFilter::ElementSet GenerateGCSTestElements() { GCSFilter::ElementSet elements; - for (int i = 0; i < 10000; ++i) { + + // Testing the benchmarks with different number of elements show that a filter + // with at least 100,000 elements results in benchmarks that have the same + // ns/op. This makes it easy to reason about how long (in nanoseconds) a single + // filter element takes to process. + for (int i = 0; i < 100000; ++i) { GCSFilter::Element element(32); element[0] = static_cast(i); element[1] = static_cast(i >> 8); elements.insert(std::move(element)); } + return elements; +} + +static void GCSBlockFilterGetHash(benchmark::Bench& bench) +{ + auto elements = GenerateGCSTestElements(); + + GCSFilter filter({0, 0, BASIC_FILTER_P, BASIC_FILTER_M}, elements); + BlockFilter block_filter(BlockFilterType::BASIC_FILTER, {}, filter.GetEncoded(), /*skip_decode_check=*/false); + + bench.run([&] { + block_filter.GetHash(); + }); +} + +static void GCSFilterConstruct(benchmark::Bench& bench) +{ + auto elements = GenerateGCSTestElements(); + uint64_t siphash_k0 = 0; - bench.batch(elements.size()).unit("elem").run([&] { - GCSFilter filter({siphash_k0, 0, 20, 1 << 20}, elements); + bench.run([&]{ + GCSFilter filter({siphash_k0, 0, BASIC_FILTER_P, BASIC_FILTER_M}, elements); + siphash_k0++; }); } -static void MatchGCSFilter(benchmark::Bench& bench) +static void GCSFilterDecode(benchmark::Bench& bench) { - GCSFilter::ElementSet elements; - for (int i = 0; i < 10000; ++i) { - GCSFilter::Element element(32); - element[0] = static_cast(i); - element[1] = static_cast(i >> 8); - elements.insert(std::move(element)); - } - GCSFilter filter({0, 0, 20, 1 << 20}, elements); + auto elements = GenerateGCSTestElements(); - bench.unit("elem").run([&] { - filter.Match(GCSFilter::Element()); + GCSFilter filter({0, 0, BASIC_FILTER_P, BASIC_FILTER_M}, elements); + auto encoded = filter.GetEncoded(); + + bench.run([&] { + GCSFilter filter({0, 0, BASIC_FILTER_P, BASIC_FILTER_M}, encoded, /*skip_decode_check=*/false); }); } -BENCHMARK(ConstructGCSFilter); -BENCHMARK(MatchGCSFilter); +static void GCSFilterDecodeSkipCheck(benchmark::Bench& bench) +{ + auto elements = GenerateGCSTestElements(); + + GCSFilter filter({0, 0, BASIC_FILTER_P, BASIC_FILTER_M}, elements); + auto encoded = filter.GetEncoded(); + + bench.run([&] { + GCSFilter filter({0, 0, BASIC_FILTER_P, BASIC_FILTER_M}, encoded, /*skip_decode_check=*/true); + }); +} + +static void GCSFilterMatch(benchmark::Bench& bench) +{ + auto elements = GenerateGCSTestElements(); + + GCSFilter filter({0, 0, BASIC_FILTER_P, BASIC_FILTER_M}, elements); + + bench.run([&] { + filter.Match(GCSFilter::Element()); + }); +} +BENCHMARK(GCSBlockFilterGetHash); +BENCHMARK(GCSFilterConstruct); +BENCHMARK(GCSFilterDecode); +BENCHMARK(GCSFilterDecodeSkipCheck); +BENCHMARK(GCSFilterMatch); diff --git a/src/blockfilter.cpp b/src/blockfilter.cpp index 25ca14c002..e7cc29afda 100644 --- a/src/blockfilter.cpp +++ b/src/blockfilter.cpp @@ -47,7 +47,7 @@ GCSFilter::GCSFilter(const Params& params) : m_params(params), m_N(0), m_F(0), m_encoded{0} {} -GCSFilter::GCSFilter(const Params& params, std::vector encoded_filter) +GCSFilter::GCSFilter(const Params& params, std::vector encoded_filter, bool skip_decode_check) : m_params(params), m_encoded(std::move(encoded_filter)) { SpanReader stream{GCS_SER_TYPE, GCS_SER_VERSION, m_encoded, 0}; @@ -59,6 +59,8 @@ GCSFilter::GCSFilter(const Params& params, std::vector encoded_fi } m_F = static_cast(m_N) * static_cast(m_params.m_M); + if (skip_decode_check) return; + // Verify that the encoded filter contains exactly N elements. If it has too much or too little // data, a std::ios_base::failure exception will be raised. BitStreamReader bitreader{stream}; @@ -219,14 +221,14 @@ static GCSFilter::ElementSet BasicFilterElements(const CBlock& block, } BlockFilter::BlockFilter(BlockFilterType filter_type, const uint256& block_hash, - std::vector filter) + std::vector filter, bool skip_decode_check) : m_filter_type(filter_type), m_block_hash(block_hash) { GCSFilter::Params params; if (!BuildParams(params)) { throw std::invalid_argument("unknown filter_type"); } - m_filter = GCSFilter(params, std::move(filter)); + m_filter = GCSFilter(params, std::move(filter), skip_decode_check); } BlockFilter::BlockFilter(BlockFilterType filter_type, const CBlock& block, const CBlockUndo& block_undo) diff --git a/src/blockfilter.h b/src/blockfilter.h index 8bdd789dd5..081e35ed9f 100644 --- a/src/blockfilter.h +++ b/src/blockfilter.h @@ -60,7 +60,7 @@ public: explicit GCSFilter(const Params& params = Params()); /** Reconstructs an already-created filter from an encoding. */ - GCSFilter(const Params& params, std::vector encoded_filter); + GCSFilter(const Params& params, std::vector encoded_filter, bool skip_decode_check); /** Builds a new filter from the params and set of elements. */ GCSFilter(const Params& params, const ElementSet& elements); @@ -123,7 +123,7 @@ public: //! Reconstruct a BlockFilter from parts. BlockFilter(BlockFilterType filter_type, const uint256& block_hash, - std::vector filter); + std::vector filter, bool skip_decode_check); //! Construct a new BlockFilter of the specified type from a block. BlockFilter(BlockFilterType filter_type, const CBlock& block, const CBlockUndo& block_undo); @@ -165,7 +165,7 @@ public: if (!BuildParams(params)) { throw std::ios_base::failure("unknown filter_type"); } - m_filter = GCSFilter(params, std::move(encoded_filter)); + m_filter = GCSFilter(params, std::move(encoded_filter), /*skip_decode_check=*/false); } }; diff --git a/src/index/blockfilterindex.cpp b/src/index/blockfilterindex.cpp index 75a190fdb8..2e84934eba 100644 --- a/src/index/blockfilterindex.cpp +++ b/src/index/blockfilterindex.cpp @@ -5,6 +5,7 @@ #include #include +#include #include #include #include @@ -146,18 +147,22 @@ bool BlockFilterIndex::CommitInternal(CDBBatch& batch) return BaseIndex::CommitInternal(batch); } -bool BlockFilterIndex::ReadFilterFromDisk(const FlatFilePos& pos, BlockFilter& filter) const +bool BlockFilterIndex::ReadFilterFromDisk(const FlatFilePos& pos, const uint256& hash, BlockFilter& filter) const { CAutoFile filein(m_filter_fileseq->Open(pos, true), SER_DISK, CLIENT_VERSION); if (filein.IsNull()) { return false; } + // Check that the hash of the encoded_filter matches the one stored in the db. uint256 block_hash; std::vector encoded_filter; try { filein >> block_hash >> encoded_filter; - filter = BlockFilter(GetFilterType(), block_hash, std::move(encoded_filter)); + uint256 result; + CHash256().Write(encoded_filter).Finalize(result); + if (result != hash) return error("Checksum mismatch in filter decode."); + filter = BlockFilter(GetFilterType(), block_hash, std::move(encoded_filter), /*skip_decode_check=*/true); } catch (const std::exception& e) { return error("%s: Failed to deserialize block filter from disk: %s", __func__, e.what()); @@ -384,7 +389,7 @@ bool BlockFilterIndex::LookupFilter(const CBlockIndex* block_index, BlockFilter& return false; } - return ReadFilterFromDisk(entry.pos, filter_out); + return ReadFilterFromDisk(entry.pos, entry.hash, filter_out); } bool BlockFilterIndex::LookupFilterHeader(const CBlockIndex* block_index, uint256& header_out) @@ -428,7 +433,7 @@ bool BlockFilterIndex::LookupFilterRange(int start_height, const CBlockIndex* st filters_out.resize(entries.size()); auto filter_pos_it = filters_out.begin(); for (const auto& entry : entries) { - if (!ReadFilterFromDisk(entry.pos, *filter_pos_it)) { + if (!ReadFilterFromDisk(entry.pos, entry.hash, *filter_pos_it)) { return false; } ++filter_pos_it; diff --git a/src/index/blockfilterindex.h b/src/index/blockfilterindex.h index 06b107d111..38754ead04 100644 --- a/src/index/blockfilterindex.h +++ b/src/index/blockfilterindex.h @@ -32,7 +32,7 @@ private: FlatFilePos m_next_filter_pos; std::unique_ptr m_filter_fileseq; - bool ReadFilterFromDisk(const FlatFilePos& pos, BlockFilter& filter) const; + bool ReadFilterFromDisk(const FlatFilePos& pos, const uint256& hash, BlockFilter& filter) const; size_t WriteFilterToDisk(FlatFilePos& pos, const BlockFilter& filter); Mutex m_cs_headers_cache;