dash/src/streams.h

813 lines
24 KiB
C
Raw Normal View History

// Copyright (c) 2009-2010 Satoshi Nakamoto
// Copyright (c) 2009-2019 The Bitcoin Core developers
2014-12-13 05:09:33 +01:00
// Distributed under the MIT software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
#ifndef BITCOIN_STREAMS_H
#define BITCOIN_STREAMS_H
Backport 11651 (#3358) * scripted-diff: Replace #include "" with #include <> (ryanofsky) -BEGIN VERIFY SCRIPT- for f in \ src/*.cpp \ src/*.h \ src/bench/*.cpp \ src/bench/*.h \ src/compat/*.cpp \ src/compat/*.h \ src/consensus/*.cpp \ src/consensus/*.h \ src/crypto/*.cpp \ src/crypto/*.h \ src/crypto/ctaes/*.h \ src/policy/*.cpp \ src/policy/*.h \ src/primitives/*.cpp \ src/primitives/*.h \ src/qt/*.cpp \ src/qt/*.h \ src/qt/test/*.cpp \ src/qt/test/*.h \ src/rpc/*.cpp \ src/rpc/*.h \ src/script/*.cpp \ src/script/*.h \ src/support/*.cpp \ src/support/*.h \ src/support/allocators/*.h \ src/test/*.cpp \ src/test/*.h \ src/wallet/*.cpp \ src/wallet/*.h \ src/wallet/test/*.cpp \ src/wallet/test/*.h \ src/zmq/*.cpp \ src/zmq/*.h do base=${f%/*}/ relbase=${base#src/} sed -i "s:#include \"\(.*\)\"\(.*\):if test -e \$base'\\1'; then echo \"#include <\"\$relbase\"\\1>\\2\"; else echo \"#include <\\1>\\2\"; fi:e" $f done -END VERIFY SCRIPT- Signed-off-by: Pasta <pasta@dashboost.org> * scripted-diff: Replace #include "" with #include <> (Dash Specific) -BEGIN VERIFY SCRIPT- for f in \ src/bls/*.cpp \ src/bls/*.h \ src/evo/*.cpp \ src/evo/*.h \ src/governance/*.cpp \ src/governance/*.h \ src/llmq/*.cpp \ src/llmq/*.h \ src/masternode/*.cpp \ src/masternode/*.h \ src/privatesend/*.cpp \ src/privatesend/*.h do base=${f%/*}/ relbase=${base#src/} sed -i "s:#include \"\(.*\)\"\(.*\):if test -e \$base'\\1'; then echo \"#include <\"\$relbase\"\\1>\\2\"; else echo \"#include <\\1>\\2\"; fi:e" $f done -END VERIFY SCRIPT- Signed-off-by: Pasta <pasta@dashboost.org> * build: Remove -I for everything but project root Remove -I from build system for everything but the project root, and built-in dependencies. Signed-off-by: Pasta <pasta@dashboost.org> # Conflicts: # src/Makefile.test.include * qt: refactor: Use absolute include paths in .ui files * qt: refactor: Changes to make include paths absolute This makes all include paths in the GUI absolute. Many changes are involved as every single source file in src/qt/ assumes to be able to use relative includes. Signed-off-by: Pasta <pasta@dashboost.org> # Conflicts: # src/qt/dash.cpp # src/qt/optionsmodel.cpp # src/qt/test/rpcnestedtests.cpp * test: refactor: Use absolute include paths for test data files * Recommend #include<> syntax in developer notes * refactor: Include obj/build.h instead of build.h * END BACKPORT #11651 Remove trailing whitespace causing travis failure * fix backport 11651 Signed-off-by: Pasta <pasta@dashboost.org> * More of 11651 * fix blockchain.cpp Signed-off-by: pasta <pasta@dashboost.org> * Add missing "qt/" in includes * Add missing "test/" in includes * Fix trailing whitespaces Co-authored-by: Wladimir J. van der Laan <laanwj@gmail.com> Co-authored-by: Russell Yanofsky <russ@yanofsky.org> Co-authored-by: MeshCollider <dobsonsa68@gmail.com> Co-authored-by: UdjinM6 <UdjinM6@users.noreply.github.com>
2020-03-19 23:46:56 +01:00
#include <serialize.h>
#include <span.h>
#include <support/allocators/zeroafterfree.h>
#include <algorithm>
#include <assert.h>
#include <ios>
#include <limits>
#include <optional>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <string>
#include <utility>
#include <vector>
template<typename Stream>
class OverrideStream
{
Stream* stream;
const int nType;
const int nVersion;
public:
OverrideStream(Stream* stream_, int nType_, int nVersion_) : stream(stream_), nType(nType_), nVersion(nVersion_) {}
template<typename T>
OverrideStream<Stream>& operator<<(const T& obj)
{
// Serialize to this stream
::Serialize(*this, obj);
return (*this);
}
template<typename T>
OverrideStream<Stream>& operator>>(T&& obj)
{
// Unserialize from this stream
::Unserialize(*this, obj);
return (*this);
}
void write(const char* pch, size_t nSize)
{
stream->write(pch, nSize);
}
void read(char* pch, size_t nSize)
{
stream->read(pch, nSize);
}
int GetVersion() const { return nVersion; }
int GetType() const { return nType; }
size_t size() const { return stream->size(); }
void ignore(size_t size) { return stream->ignore(size); }
};
/* Minimal stream for overwriting and/or appending to an existing byte vector
*
* The referenced vector will grow as necessary
*/
class CVectorWriter
{
public:
/*
* @param[in] nTypeIn Serialization Type
* @param[in] nVersionIn Serialization Version (including any flags)
* @param[in] vchDataIn Referenced byte vector to overwrite/append
* @param[in] nPosIn Starting position. Vector index where writes should start. The vector will initially
* grow as necessary to max(nPosIn, vec.size()). So to append, use vec.size().
*/
CVectorWriter(int nTypeIn, int nVersionIn, std::vector<unsigned char>& vchDataIn, size_t nPosIn) : nType(nTypeIn), nVersion(nVersionIn), vchData(vchDataIn), nPos(nPosIn)
{
if(nPos > vchData.size())
vchData.resize(nPos);
}
/*
* (other params same as above)
* @param[in] args A list of items to serialize starting at nPosIn.
*/
template <typename... Args>
CVectorWriter(int nTypeIn, int nVersionIn, std::vector<unsigned char>& vchDataIn, size_t nPosIn, Args&&... args) : CVectorWriter(nTypeIn, nVersionIn, vchDataIn, nPosIn)
{
::SerializeMany(*this, std::forward<Args>(args)...);
}
void write(const char* pch, size_t nSize)
{
assert(nPos <= vchData.size());
size_t nOverwrite = std::min(nSize, vchData.size() - nPos);
if (nOverwrite) {
memcpy(vchData.data() + nPos, reinterpret_cast<const unsigned char*>(pch), nOverwrite);
}
if (nOverwrite < nSize) {
vchData.insert(vchData.end(), reinterpret_cast<const unsigned char*>(pch) + nOverwrite, reinterpret_cast<const unsigned char*>(pch) + nSize);
}
nPos += nSize;
}
template<typename T>
CVectorWriter& operator<<(const T& obj)
{
// Serialize to this stream
::Serialize(*this, obj);
return (*this);
}
int GetVersion() const
{
return nVersion;
}
int GetType() const
{
return nType;
}
size_t size() const
{
return vchData.size() - nPos;
}
private:
const int nType;
const int nVersion;
std::vector<unsigned char>& vchData;
size_t nPos;
};
Merge #12254: BIP 158: Compact Block Filters for Light Clients 254c85b68794ada713dbdae415db72adf5fcbaf3 bench: Benchmark GCS filter creation and matching. (Jim Posen) f33b717a85363e067316c133a542559d2f4aaeca blockfilter: Optimization on compilers with int128 support. (Jim Posen) 97b64d67daf0336dfb64b132f3e4d6a4c1967da4 blockfilter: Unit test against BIP 158 test vectors. (Jim Posen) a4afb9cadbaecb0676e6475ab8d32a52faecb47a blockfilter: Additional helper methods to compute hash and header. (Jim Posen) cd09c7925b5af4104834971cfe072251e3ac2bda blockfilter: Serialization methods on BlockFilter. (Jim Posen) c1855f6052aca806fdb51be01b30dfeee8b55f40 blockfilter: Construction of basic block filters. (Jim Posen) 53e7874e079f9ddfe8b176f11d46e6b59c7283d5 blockfilter: Simple test for GCSFilter construction and Match. (Jim Posen) 558c536e35a25594881693e6ff01d275c88d7af1 blockfilter: Implement GCSFilter Match methods. (Jim Posen) cf70b550054eed36f194eaa13f4a9cb31e32df38 blockfilter: Implement GCSFilter constructors. (Jim Posen) c454f0ac63c6028f54c7eb51683b3ccdb475b19b blockfilter: Declare GCSFilter class for BIP 158 impl. (Jim Posen) 9b622dc72279b027c59d6541cddff53800fc689b streams: Unit tests for BitStreamReader and BitStreamWriter. (Jim Posen) fe943f99bf0a2bbb12e30bc4803c0337e3c95b93 streams: Implement BitStreamReader/Writer classes. (Jim Posen) 87f2d9ee43a9220076b1959d1ca65245d9591be9 streams: Unit test for VectorReader class. (Jim Posen) 947133dec92cd25ec2b3358c09b8614ba6fb40d4 streams: Create VectorReader stream interface for vectors. (Jim Posen) Pull request description: This implements the compact block filter construction in [BIP 158](https://github.com/bitcoin/bips/blob/master/bip-0158.mediawiki). The code is not used anywhere in the Bitcoin Core code base yet. The next step towards [BIP 157](https://github.com/bitcoin/bips/blob/master/bip-0157.mediawiki) support would be to create an indexing module similar to `TxIndex` that constructs the basic and extended filters for each validated block. ### Filter Sizes [Here](https://gateway.ipfs.io/ipfs/QmRqaAAQZ5ZX5eqxP7J2R1MzFrc2WDdKSWJEKtQzyawqog) is a CSV of filter sizes for blocks in the main chain. As you can see below, the ratio of filter size to block size drops after the first ~150,000 blocks: ![filter_sizes](https://user-images.githubusercontent.com/881253/42900589-299772d4-8a7e-11e8-886d-0d4f3f4fbe44.png) The reason for the relatively large filter sizes is that Golomb-coded sets only achieve good compression with a sufficient number of elements. Empirically, the average element size with 100 elements is 14% larger than with 10,000 elements. The ratio of filter size to block size is computed without witness data for basic filters. Here is a summary table of filter size ratios *for blocks after height 150,000*: | Stat | Filter Type | |-------|--------------| | Weighted Size Ratio Mean | 0.0198 | | Size Ratio Mean | 0.0224 | | Size Ratio Std Deviation | 0.0202 | | Mean Element Size (bits) | 21.145 | | Approx Theoretical Min Element Size (bits) | 21.025 | Tree-SHA512: 2d045fbfc3fc45490ecb9b08d2f7e4dbbe7cd8c1c939f06bbdb8e8aacfe4c495cdb67c820e52520baebbf8a8305a0efd8e59d3fa8e367574a4b830509a39223f
2018-08-26 16:57:01 +02:00
/** Minimal stream for reading from an existing vector by reference
*/
class VectorReader
{
private:
const int m_type;
const int m_version;
const std::vector<unsigned char>& m_data;
size_t m_pos = 0;
public:
/**
Merge #12254: BIP 158: Compact Block Filters for Light Clients 254c85b68794ada713dbdae415db72adf5fcbaf3 bench: Benchmark GCS filter creation and matching. (Jim Posen) f33b717a85363e067316c133a542559d2f4aaeca blockfilter: Optimization on compilers with int128 support. (Jim Posen) 97b64d67daf0336dfb64b132f3e4d6a4c1967da4 blockfilter: Unit test against BIP 158 test vectors. (Jim Posen) a4afb9cadbaecb0676e6475ab8d32a52faecb47a blockfilter: Additional helper methods to compute hash and header. (Jim Posen) cd09c7925b5af4104834971cfe072251e3ac2bda blockfilter: Serialization methods on BlockFilter. (Jim Posen) c1855f6052aca806fdb51be01b30dfeee8b55f40 blockfilter: Construction of basic block filters. (Jim Posen) 53e7874e079f9ddfe8b176f11d46e6b59c7283d5 blockfilter: Simple test for GCSFilter construction and Match. (Jim Posen) 558c536e35a25594881693e6ff01d275c88d7af1 blockfilter: Implement GCSFilter Match methods. (Jim Posen) cf70b550054eed36f194eaa13f4a9cb31e32df38 blockfilter: Implement GCSFilter constructors. (Jim Posen) c454f0ac63c6028f54c7eb51683b3ccdb475b19b blockfilter: Declare GCSFilter class for BIP 158 impl. (Jim Posen) 9b622dc72279b027c59d6541cddff53800fc689b streams: Unit tests for BitStreamReader and BitStreamWriter. (Jim Posen) fe943f99bf0a2bbb12e30bc4803c0337e3c95b93 streams: Implement BitStreamReader/Writer classes. (Jim Posen) 87f2d9ee43a9220076b1959d1ca65245d9591be9 streams: Unit test for VectorReader class. (Jim Posen) 947133dec92cd25ec2b3358c09b8614ba6fb40d4 streams: Create VectorReader stream interface for vectors. (Jim Posen) Pull request description: This implements the compact block filter construction in [BIP 158](https://github.com/bitcoin/bips/blob/master/bip-0158.mediawiki). The code is not used anywhere in the Bitcoin Core code base yet. The next step towards [BIP 157](https://github.com/bitcoin/bips/blob/master/bip-0157.mediawiki) support would be to create an indexing module similar to `TxIndex` that constructs the basic and extended filters for each validated block. ### Filter Sizes [Here](https://gateway.ipfs.io/ipfs/QmRqaAAQZ5ZX5eqxP7J2R1MzFrc2WDdKSWJEKtQzyawqog) is a CSV of filter sizes for blocks in the main chain. As you can see below, the ratio of filter size to block size drops after the first ~150,000 blocks: ![filter_sizes](https://user-images.githubusercontent.com/881253/42900589-299772d4-8a7e-11e8-886d-0d4f3f4fbe44.png) The reason for the relatively large filter sizes is that Golomb-coded sets only achieve good compression with a sufficient number of elements. Empirically, the average element size with 100 elements is 14% larger than with 10,000 elements. The ratio of filter size to block size is computed without witness data for basic filters. Here is a summary table of filter size ratios *for blocks after height 150,000*: | Stat | Filter Type | |-------|--------------| | Weighted Size Ratio Mean | 0.0198 | | Size Ratio Mean | 0.0224 | | Size Ratio Std Deviation | 0.0202 | | Mean Element Size (bits) | 21.145 | | Approx Theoretical Min Element Size (bits) | 21.025 | Tree-SHA512: 2d045fbfc3fc45490ecb9b08d2f7e4dbbe7cd8c1c939f06bbdb8e8aacfe4c495cdb67c820e52520baebbf8a8305a0efd8e59d3fa8e367574a4b830509a39223f
2018-08-26 16:57:01 +02:00
* @param[in] type Serialization Type
* @param[in] version Serialization Version (including any flags)
* @param[in] data Referenced byte vector to overwrite/append
* @param[in] pos Starting position. Vector index where reads should start.
*/
VectorReader(int type, int version, const std::vector<unsigned char>& data, size_t pos)
: m_type(type), m_version(version), m_data(data), m_pos(pos)
Merge #12254: BIP 158: Compact Block Filters for Light Clients 254c85b68794ada713dbdae415db72adf5fcbaf3 bench: Benchmark GCS filter creation and matching. (Jim Posen) f33b717a85363e067316c133a542559d2f4aaeca blockfilter: Optimization on compilers with int128 support. (Jim Posen) 97b64d67daf0336dfb64b132f3e4d6a4c1967da4 blockfilter: Unit test against BIP 158 test vectors. (Jim Posen) a4afb9cadbaecb0676e6475ab8d32a52faecb47a blockfilter: Additional helper methods to compute hash and header. (Jim Posen) cd09c7925b5af4104834971cfe072251e3ac2bda blockfilter: Serialization methods on BlockFilter. (Jim Posen) c1855f6052aca806fdb51be01b30dfeee8b55f40 blockfilter: Construction of basic block filters. (Jim Posen) 53e7874e079f9ddfe8b176f11d46e6b59c7283d5 blockfilter: Simple test for GCSFilter construction and Match. (Jim Posen) 558c536e35a25594881693e6ff01d275c88d7af1 blockfilter: Implement GCSFilter Match methods. (Jim Posen) cf70b550054eed36f194eaa13f4a9cb31e32df38 blockfilter: Implement GCSFilter constructors. (Jim Posen) c454f0ac63c6028f54c7eb51683b3ccdb475b19b blockfilter: Declare GCSFilter class for BIP 158 impl. (Jim Posen) 9b622dc72279b027c59d6541cddff53800fc689b streams: Unit tests for BitStreamReader and BitStreamWriter. (Jim Posen) fe943f99bf0a2bbb12e30bc4803c0337e3c95b93 streams: Implement BitStreamReader/Writer classes. (Jim Posen) 87f2d9ee43a9220076b1959d1ca65245d9591be9 streams: Unit test for VectorReader class. (Jim Posen) 947133dec92cd25ec2b3358c09b8614ba6fb40d4 streams: Create VectorReader stream interface for vectors. (Jim Posen) Pull request description: This implements the compact block filter construction in [BIP 158](https://github.com/bitcoin/bips/blob/master/bip-0158.mediawiki). The code is not used anywhere in the Bitcoin Core code base yet. The next step towards [BIP 157](https://github.com/bitcoin/bips/blob/master/bip-0157.mediawiki) support would be to create an indexing module similar to `TxIndex` that constructs the basic and extended filters for each validated block. ### Filter Sizes [Here](https://gateway.ipfs.io/ipfs/QmRqaAAQZ5ZX5eqxP7J2R1MzFrc2WDdKSWJEKtQzyawqog) is a CSV of filter sizes for blocks in the main chain. As you can see below, the ratio of filter size to block size drops after the first ~150,000 blocks: ![filter_sizes](https://user-images.githubusercontent.com/881253/42900589-299772d4-8a7e-11e8-886d-0d4f3f4fbe44.png) The reason for the relatively large filter sizes is that Golomb-coded sets only achieve good compression with a sufficient number of elements. Empirically, the average element size with 100 elements is 14% larger than with 10,000 elements. The ratio of filter size to block size is computed without witness data for basic filters. Here is a summary table of filter size ratios *for blocks after height 150,000*: | Stat | Filter Type | |-------|--------------| | Weighted Size Ratio Mean | 0.0198 | | Size Ratio Mean | 0.0224 | | Size Ratio Std Deviation | 0.0202 | | Mean Element Size (bits) | 21.145 | | Approx Theoretical Min Element Size (bits) | 21.025 | Tree-SHA512: 2d045fbfc3fc45490ecb9b08d2f7e4dbbe7cd8c1c939f06bbdb8e8aacfe4c495cdb67c820e52520baebbf8a8305a0efd8e59d3fa8e367574a4b830509a39223f
2018-08-26 16:57:01 +02:00
{
if (m_pos > m_data.size()) {
throw std::ios_base::failure("VectorReader(...): end of data (m_pos > m_data.size())");
}
Merge #12254: BIP 158: Compact Block Filters for Light Clients 254c85b68794ada713dbdae415db72adf5fcbaf3 bench: Benchmark GCS filter creation and matching. (Jim Posen) f33b717a85363e067316c133a542559d2f4aaeca blockfilter: Optimization on compilers with int128 support. (Jim Posen) 97b64d67daf0336dfb64b132f3e4d6a4c1967da4 blockfilter: Unit test against BIP 158 test vectors. (Jim Posen) a4afb9cadbaecb0676e6475ab8d32a52faecb47a blockfilter: Additional helper methods to compute hash and header. (Jim Posen) cd09c7925b5af4104834971cfe072251e3ac2bda blockfilter: Serialization methods on BlockFilter. (Jim Posen) c1855f6052aca806fdb51be01b30dfeee8b55f40 blockfilter: Construction of basic block filters. (Jim Posen) 53e7874e079f9ddfe8b176f11d46e6b59c7283d5 blockfilter: Simple test for GCSFilter construction and Match. (Jim Posen) 558c536e35a25594881693e6ff01d275c88d7af1 blockfilter: Implement GCSFilter Match methods. (Jim Posen) cf70b550054eed36f194eaa13f4a9cb31e32df38 blockfilter: Implement GCSFilter constructors. (Jim Posen) c454f0ac63c6028f54c7eb51683b3ccdb475b19b blockfilter: Declare GCSFilter class for BIP 158 impl. (Jim Posen) 9b622dc72279b027c59d6541cddff53800fc689b streams: Unit tests for BitStreamReader and BitStreamWriter. (Jim Posen) fe943f99bf0a2bbb12e30bc4803c0337e3c95b93 streams: Implement BitStreamReader/Writer classes. (Jim Posen) 87f2d9ee43a9220076b1959d1ca65245d9591be9 streams: Unit test for VectorReader class. (Jim Posen) 947133dec92cd25ec2b3358c09b8614ba6fb40d4 streams: Create VectorReader stream interface for vectors. (Jim Posen) Pull request description: This implements the compact block filter construction in [BIP 158](https://github.com/bitcoin/bips/blob/master/bip-0158.mediawiki). The code is not used anywhere in the Bitcoin Core code base yet. The next step towards [BIP 157](https://github.com/bitcoin/bips/blob/master/bip-0157.mediawiki) support would be to create an indexing module similar to `TxIndex` that constructs the basic and extended filters for each validated block. ### Filter Sizes [Here](https://gateway.ipfs.io/ipfs/QmRqaAAQZ5ZX5eqxP7J2R1MzFrc2WDdKSWJEKtQzyawqog) is a CSV of filter sizes for blocks in the main chain. As you can see below, the ratio of filter size to block size drops after the first ~150,000 blocks: ![filter_sizes](https://user-images.githubusercontent.com/881253/42900589-299772d4-8a7e-11e8-886d-0d4f3f4fbe44.png) The reason for the relatively large filter sizes is that Golomb-coded sets only achieve good compression with a sufficient number of elements. Empirically, the average element size with 100 elements is 14% larger than with 10,000 elements. The ratio of filter size to block size is computed without witness data for basic filters. Here is a summary table of filter size ratios *for blocks after height 150,000*: | Stat | Filter Type | |-------|--------------| | Weighted Size Ratio Mean | 0.0198 | | Size Ratio Mean | 0.0224 | | Size Ratio Std Deviation | 0.0202 | | Mean Element Size (bits) | 21.145 | | Approx Theoretical Min Element Size (bits) | 21.025 | Tree-SHA512: 2d045fbfc3fc45490ecb9b08d2f7e4dbbe7cd8c1c939f06bbdb8e8aacfe4c495cdb67c820e52520baebbf8a8305a0efd8e59d3fa8e367574a4b830509a39223f
2018-08-26 16:57:01 +02:00
}
/**
Merge #12254: BIP 158: Compact Block Filters for Light Clients 254c85b68794ada713dbdae415db72adf5fcbaf3 bench: Benchmark GCS filter creation and matching. (Jim Posen) f33b717a85363e067316c133a542559d2f4aaeca blockfilter: Optimization on compilers with int128 support. (Jim Posen) 97b64d67daf0336dfb64b132f3e4d6a4c1967da4 blockfilter: Unit test against BIP 158 test vectors. (Jim Posen) a4afb9cadbaecb0676e6475ab8d32a52faecb47a blockfilter: Additional helper methods to compute hash and header. (Jim Posen) cd09c7925b5af4104834971cfe072251e3ac2bda blockfilter: Serialization methods on BlockFilter. (Jim Posen) c1855f6052aca806fdb51be01b30dfeee8b55f40 blockfilter: Construction of basic block filters. (Jim Posen) 53e7874e079f9ddfe8b176f11d46e6b59c7283d5 blockfilter: Simple test for GCSFilter construction and Match. (Jim Posen) 558c536e35a25594881693e6ff01d275c88d7af1 blockfilter: Implement GCSFilter Match methods. (Jim Posen) cf70b550054eed36f194eaa13f4a9cb31e32df38 blockfilter: Implement GCSFilter constructors. (Jim Posen) c454f0ac63c6028f54c7eb51683b3ccdb475b19b blockfilter: Declare GCSFilter class for BIP 158 impl. (Jim Posen) 9b622dc72279b027c59d6541cddff53800fc689b streams: Unit tests for BitStreamReader and BitStreamWriter. (Jim Posen) fe943f99bf0a2bbb12e30bc4803c0337e3c95b93 streams: Implement BitStreamReader/Writer classes. (Jim Posen) 87f2d9ee43a9220076b1959d1ca65245d9591be9 streams: Unit test for VectorReader class. (Jim Posen) 947133dec92cd25ec2b3358c09b8614ba6fb40d4 streams: Create VectorReader stream interface for vectors. (Jim Posen) Pull request description: This implements the compact block filter construction in [BIP 158](https://github.com/bitcoin/bips/blob/master/bip-0158.mediawiki). The code is not used anywhere in the Bitcoin Core code base yet. The next step towards [BIP 157](https://github.com/bitcoin/bips/blob/master/bip-0157.mediawiki) support would be to create an indexing module similar to `TxIndex` that constructs the basic and extended filters for each validated block. ### Filter Sizes [Here](https://gateway.ipfs.io/ipfs/QmRqaAAQZ5ZX5eqxP7J2R1MzFrc2WDdKSWJEKtQzyawqog) is a CSV of filter sizes for blocks in the main chain. As you can see below, the ratio of filter size to block size drops after the first ~150,000 blocks: ![filter_sizes](https://user-images.githubusercontent.com/881253/42900589-299772d4-8a7e-11e8-886d-0d4f3f4fbe44.png) The reason for the relatively large filter sizes is that Golomb-coded sets only achieve good compression with a sufficient number of elements. Empirically, the average element size with 100 elements is 14% larger than with 10,000 elements. The ratio of filter size to block size is computed without witness data for basic filters. Here is a summary table of filter size ratios *for blocks after height 150,000*: | Stat | Filter Type | |-------|--------------| | Weighted Size Ratio Mean | 0.0198 | | Size Ratio Mean | 0.0224 | | Size Ratio Std Deviation | 0.0202 | | Mean Element Size (bits) | 21.145 | | Approx Theoretical Min Element Size (bits) | 21.025 | Tree-SHA512: 2d045fbfc3fc45490ecb9b08d2f7e4dbbe7cd8c1c939f06bbdb8e8aacfe4c495cdb67c820e52520baebbf8a8305a0efd8e59d3fa8e367574a4b830509a39223f
2018-08-26 16:57:01 +02:00
* (other params same as above)
* @param[in] args A list of items to deserialize starting at pos.
*/
template <typename... Args>
VectorReader(int type, int version, const std::vector<unsigned char>& data, size_t pos,
Args&&... args)
: VectorReader(type, version, data, pos)
{
::UnserializeMany(*this, std::forward<Args>(args)...);
}
template<typename T>
VectorReader& operator>>(T&& obj)
Merge #12254: BIP 158: Compact Block Filters for Light Clients 254c85b68794ada713dbdae415db72adf5fcbaf3 bench: Benchmark GCS filter creation and matching. (Jim Posen) f33b717a85363e067316c133a542559d2f4aaeca blockfilter: Optimization on compilers with int128 support. (Jim Posen) 97b64d67daf0336dfb64b132f3e4d6a4c1967da4 blockfilter: Unit test against BIP 158 test vectors. (Jim Posen) a4afb9cadbaecb0676e6475ab8d32a52faecb47a blockfilter: Additional helper methods to compute hash and header. (Jim Posen) cd09c7925b5af4104834971cfe072251e3ac2bda blockfilter: Serialization methods on BlockFilter. (Jim Posen) c1855f6052aca806fdb51be01b30dfeee8b55f40 blockfilter: Construction of basic block filters. (Jim Posen) 53e7874e079f9ddfe8b176f11d46e6b59c7283d5 blockfilter: Simple test for GCSFilter construction and Match. (Jim Posen) 558c536e35a25594881693e6ff01d275c88d7af1 blockfilter: Implement GCSFilter Match methods. (Jim Posen) cf70b550054eed36f194eaa13f4a9cb31e32df38 blockfilter: Implement GCSFilter constructors. (Jim Posen) c454f0ac63c6028f54c7eb51683b3ccdb475b19b blockfilter: Declare GCSFilter class for BIP 158 impl. (Jim Posen) 9b622dc72279b027c59d6541cddff53800fc689b streams: Unit tests for BitStreamReader and BitStreamWriter. (Jim Posen) fe943f99bf0a2bbb12e30bc4803c0337e3c95b93 streams: Implement BitStreamReader/Writer classes. (Jim Posen) 87f2d9ee43a9220076b1959d1ca65245d9591be9 streams: Unit test for VectorReader class. (Jim Posen) 947133dec92cd25ec2b3358c09b8614ba6fb40d4 streams: Create VectorReader stream interface for vectors. (Jim Posen) Pull request description: This implements the compact block filter construction in [BIP 158](https://github.com/bitcoin/bips/blob/master/bip-0158.mediawiki). The code is not used anywhere in the Bitcoin Core code base yet. The next step towards [BIP 157](https://github.com/bitcoin/bips/blob/master/bip-0157.mediawiki) support would be to create an indexing module similar to `TxIndex` that constructs the basic and extended filters for each validated block. ### Filter Sizes [Here](https://gateway.ipfs.io/ipfs/QmRqaAAQZ5ZX5eqxP7J2R1MzFrc2WDdKSWJEKtQzyawqog) is a CSV of filter sizes for blocks in the main chain. As you can see below, the ratio of filter size to block size drops after the first ~150,000 blocks: ![filter_sizes](https://user-images.githubusercontent.com/881253/42900589-299772d4-8a7e-11e8-886d-0d4f3f4fbe44.png) The reason for the relatively large filter sizes is that Golomb-coded sets only achieve good compression with a sufficient number of elements. Empirically, the average element size with 100 elements is 14% larger than with 10,000 elements. The ratio of filter size to block size is computed without witness data for basic filters. Here is a summary table of filter size ratios *for blocks after height 150,000*: | Stat | Filter Type | |-------|--------------| | Weighted Size Ratio Mean | 0.0198 | | Size Ratio Mean | 0.0224 | | Size Ratio Std Deviation | 0.0202 | | Mean Element Size (bits) | 21.145 | | Approx Theoretical Min Element Size (bits) | 21.025 | Tree-SHA512: 2d045fbfc3fc45490ecb9b08d2f7e4dbbe7cd8c1c939f06bbdb8e8aacfe4c495cdb67c820e52520baebbf8a8305a0efd8e59d3fa8e367574a4b830509a39223f
2018-08-26 16:57:01 +02:00
{
// Unserialize from this stream
::Unserialize(*this, obj);
return (*this);
}
int GetVersion() const { return m_version; }
int GetType() const { return m_type; }
size_t size() const { return m_data.size() - m_pos; }
bool empty() const { return m_data.size() == m_pos; }
void read(char* dst, size_t n)
{
if (n == 0) {
return;
}
// Read from the beginning of the buffer
size_t pos_next = m_pos + n;
if (pos_next > m_data.size()) {
throw std::ios_base::failure("VectorReader::read(): end of data");
}
memcpy(dst, m_data.data() + m_pos, n);
m_pos = pos_next;
}
};
/** Double ended buffer combining vector and stream-like interfaces.
*
* >> and << read and write unformatted data using the above serialization templates.
* Fills with data in linear time; some stringstream implementations take N^2 time.
*/
class CDataStream
{
protected:
using vector_type = SerializeData;
vector_type vch;
unsigned int nReadPos{0};
int nType;
int nVersion;
public:
typedef vector_type::allocator_type allocator_type;
typedef vector_type::size_type size_type;
typedef vector_type::difference_type difference_type;
typedef vector_type::reference reference;
typedef vector_type::const_reference const_reference;
typedef vector_type::value_type value_type;
typedef vector_type::iterator iterator;
typedef vector_type::const_iterator const_iterator;
typedef vector_type::reverse_iterator reverse_iterator;
explicit CDataStream(int nTypeIn, int nVersionIn)
: nType{nTypeIn},
nVersion{nVersionIn} {}
explicit CDataStream(Span<const value_type> sp, int nTypeIn, int nVersionIn)
: vch(sp.data(), sp.data() + sp.size()),
nType{nTypeIn},
nVersion{nVersionIn} {}
Backport Bitcoin PR#8708: net: have CConnman handle message sending (#1553) * serialization: teach serializers variadics Also add a variadic CDataStream ctor for ease-of-use. * connman is in charge of pushing messages The changes here are dense and subtle, but hopefully all is more explicit than before. - CConnman is now in charge of sending data rather than the nodes themselves. This is necessary because many decisions need to be made with all nodes in mind, and a model that requires the nodes calling up to their manager quickly turns to spaghetti. - The per-node-serializer (ssSend) has been replaced with a (quasi-)const send-version. Since the send version for serialization can only change once per connection, we now explicitly tag messages with INIT_PROTO_VERSION if they are sent before the handshake. With this done, there's no need to lock for access to nSendVersion. Also, a new stream is used for each message, so there's no need to lock during the serialization process. - This takes care of accounting for optimistic sends, so the nOptimisticBytesWritten hack can be removed. - -dropmessagestest and -fuzzmessagestest have not been preserved, as I suspect they haven't been used in years. * net: switch all callers to connman for pushing messages Drop all of the old stuff. * drop the optimistic write counter hack This is now handled properly in realtime. * net: remove now-unused ssSend and Fuzz * net: construct CNodeStates in place * net: handle version push in InitializeNode
2017-07-27 16:28:05 +02:00
template <typename... Args>
CDataStream(int nTypeIn, int nVersionIn, Args&&... args)
: nType{nTypeIn},
nVersion{nVersionIn}
Backport Bitcoin PR#8708: net: have CConnman handle message sending (#1553) * serialization: teach serializers variadics Also add a variadic CDataStream ctor for ease-of-use. * connman is in charge of pushing messages The changes here are dense and subtle, but hopefully all is more explicit than before. - CConnman is now in charge of sending data rather than the nodes themselves. This is necessary because many decisions need to be made with all nodes in mind, and a model that requires the nodes calling up to their manager quickly turns to spaghetti. - The per-node-serializer (ssSend) has been replaced with a (quasi-)const send-version. Since the send version for serialization can only change once per connection, we now explicitly tag messages with INIT_PROTO_VERSION if they are sent before the handshake. With this done, there's no need to lock for access to nSendVersion. Also, a new stream is used for each message, so there's no need to lock during the serialization process. - This takes care of accounting for optimistic sends, so the nOptimisticBytesWritten hack can be removed. - -dropmessagestest and -fuzzmessagestest have not been preserved, as I suspect they haven't been used in years. * net: switch all callers to connman for pushing messages Drop all of the old stuff. * drop the optimistic write counter hack This is now handled properly in realtime. * net: remove now-unused ssSend and Fuzz * net: construct CNodeStates in place * net: handle version push in InitializeNode
2017-07-27 16:28:05 +02:00
{
::SerializeMany(*this, std::forward<Args>(args)...);
Backport Bitcoin PR#8708: net: have CConnman handle message sending (#1553) * serialization: teach serializers variadics Also add a variadic CDataStream ctor for ease-of-use. * connman is in charge of pushing messages The changes here are dense and subtle, but hopefully all is more explicit than before. - CConnman is now in charge of sending data rather than the nodes themselves. This is necessary because many decisions need to be made with all nodes in mind, and a model that requires the nodes calling up to their manager quickly turns to spaghetti. - The per-node-serializer (ssSend) has been replaced with a (quasi-)const send-version. Since the send version for serialization can only change once per connection, we now explicitly tag messages with INIT_PROTO_VERSION if they are sent before the handshake. With this done, there's no need to lock for access to nSendVersion. Also, a new stream is used for each message, so there's no need to lock during the serialization process. - This takes care of accounting for optimistic sends, so the nOptimisticBytesWritten hack can be removed. - -dropmessagestest and -fuzzmessagestest have not been preserved, as I suspect they haven't been used in years. * net: switch all callers to connman for pushing messages Drop all of the old stuff. * drop the optimistic write counter hack This is now handled properly in realtime. * net: remove now-unused ssSend and Fuzz * net: construct CNodeStates in place * net: handle version push in InitializeNode
2017-07-27 16:28:05 +02:00
}
std::string str() const
{
return (std::string(begin(), end()));
}
//
// Vector subset
//
const_iterator begin() const { return vch.begin() + nReadPos; }
iterator begin() { return vch.begin() + nReadPos; }
const_iterator end() const { return vch.end(); }
iterator end() { return vch.end(); }
size_type size() const { return vch.size() - nReadPos; }
bool empty() const { return vch.size() == nReadPos; }
void resize(size_type n, value_type c = value_type{}) { vch.resize(n + nReadPos, c); }
void reserve(size_type n) { vch.reserve(n + nReadPos); }
const_reference operator[](size_type pos) const { return vch[pos + nReadPos]; }
reference operator[](size_type pos) { return vch[pos + nReadPos]; }
void clear() { vch.clear(); nReadPos = 0; }
iterator insert(iterator it, const value_type x) { return vch.insert(it, x); }
void insert(iterator it, size_type n, const value_type x) { vch.insert(it, n, x); }
value_type* data() { return vch.data() + nReadPos; }
const value_type* data() const { return vch.data() + nReadPos; }
void insert(iterator it, std::vector<value_type>::const_iterator first, std::vector<value_type>::const_iterator last)
{
if (last == first) return;
assert(last - first > 0);
if (it == vch.begin() + nReadPos && (unsigned int)(last - first) <= nReadPos)
{
// special case for inserting at the front when there's room
nReadPos -= (last - first);
memcpy(&vch[nReadPos], &first[0], last - first);
}
else
vch.insert(it, first, last);
}
void insert(iterator it, const value_type* first, const value_type* last)
{
if (last == first) return;
assert(last - first > 0);
if (it == vch.begin() + nReadPos && (unsigned int)(last - first) <= nReadPos)
{
// special case for inserting at the front when there's room
nReadPos -= (last - first);
memcpy(&vch[nReadPos], &first[0], last - first);
}
else
vch.insert(it, first, last);
}
iterator erase(iterator it)
{
if (it == vch.begin() + nReadPos)
{
// special case for erasing from the front
if (++nReadPos >= vch.size())
{
// whenever we reach the end, we take the opportunity to clear the buffer
nReadPos = 0;
return vch.erase(vch.begin(), vch.end());
}
return vch.begin() + nReadPos;
}
else
return vch.erase(it);
}
iterator erase(iterator first, iterator last)
{
if (first == vch.begin() + nReadPos)
{
// special case for erasing from the front
if (last == vch.end())
{
nReadPos = 0;
return vch.erase(vch.begin(), vch.end());
}
else
{
nReadPos = (last - vch.begin());
return last;
}
}
else
return vch.erase(first, last);
}
inline void Compact()
{
vch.erase(vch.begin(), vch.begin() + nReadPos);
nReadPos = 0;
}
bool Rewind(std::optional<size_type> n = std::nullopt)
{
// Total rewind if no size is passed
if (!n) {
nReadPos = 0;
return true;
}
// Rewind by n characters if the buffer hasn't been compacted yet
if (*n > nReadPos)
return false;
nReadPos -= *n;
return true;
}
//
// Stream subset
//
bool eof() const { return size() == 0; }
CDataStream* rdbuf() { return this; }
int in_avail() const { return size(); }
void SetType(int n) { nType = n; }
int GetType() const { return nType; }
void SetVersion(int n) { nVersion = n; }
int GetVersion() const { return nVersion; }
void read(char* pch, size_t nSize)
{
if (nSize == 0) return;
// Read from the beginning of the buffer
unsigned int nReadPosNext = nReadPos + nSize;
if (nReadPosNext > vch.size()) {
throw std::ios_base::failure("CDataStream::read(): end of data");
}
memcpy(pch, &vch[nReadPos], nSize);
if (nReadPosNext == vch.size())
{
nReadPos = 0;
vch.clear();
return;
}
nReadPos = nReadPosNext;
}
void ignore(int nSize)
{
// Ignore from the beginning of the buffer
if (nSize < 0) {
throw std::ios_base::failure("CDataStream::ignore(): nSize negative");
}
unsigned int nReadPosNext = nReadPos + nSize;
if (nReadPosNext >= vch.size())
{
if (nReadPosNext > vch.size())
throw std::ios_base::failure("CDataStream::ignore(): end of data");
nReadPos = 0;
vch.clear();
return;
}
nReadPos = nReadPosNext;
}
void write(const char* pch, size_t nSize)
{
// Write to the end of the buffer
vch.insert(vch.end(), pch, pch + nSize);
}
template<typename Stream>
void Serialize(Stream& s) const
{
// Special case: stream << stream concatenates like stream += stream
if (!vch.empty())
s.write((char*)vch.data(), vch.size() * sizeof(value_type));
}
template<typename T>
CDataStream& operator<<(const T& obj)
{
// Serialize to this stream
::Serialize(*this, obj);
return (*this);
}
template<typename T>
CDataStream& operator>>(T&& obj)
{
// Unserialize from this stream
::Unserialize(*this, obj);
return (*this);
}
/**
* XOR the contents of this stream with a certain key.
*
* @param[in] key The key used to XOR the data in this stream.
*/
void Xor(const std::vector<unsigned char>& key)
{
if (key.size() == 0) {
return;
}
for (size_type i = 0, j = 0; i != size(); i++) {
vch[i] ^= key[j++];
// This potentially acts on very many bytes of data, so it's
// important that we calculate `j`, i.e. the `key` index in this
// way instead of doing a %, which would effectively be a division
// for each byte Xor'd -- much slower than need be.
if (j == key.size())
j = 0;
}
}
};
Merge #12254: BIP 158: Compact Block Filters for Light Clients 254c85b68794ada713dbdae415db72adf5fcbaf3 bench: Benchmark GCS filter creation and matching. (Jim Posen) f33b717a85363e067316c133a542559d2f4aaeca blockfilter: Optimization on compilers with int128 support. (Jim Posen) 97b64d67daf0336dfb64b132f3e4d6a4c1967da4 blockfilter: Unit test against BIP 158 test vectors. (Jim Posen) a4afb9cadbaecb0676e6475ab8d32a52faecb47a blockfilter: Additional helper methods to compute hash and header. (Jim Posen) cd09c7925b5af4104834971cfe072251e3ac2bda blockfilter: Serialization methods on BlockFilter. (Jim Posen) c1855f6052aca806fdb51be01b30dfeee8b55f40 blockfilter: Construction of basic block filters. (Jim Posen) 53e7874e079f9ddfe8b176f11d46e6b59c7283d5 blockfilter: Simple test for GCSFilter construction and Match. (Jim Posen) 558c536e35a25594881693e6ff01d275c88d7af1 blockfilter: Implement GCSFilter Match methods. (Jim Posen) cf70b550054eed36f194eaa13f4a9cb31e32df38 blockfilter: Implement GCSFilter constructors. (Jim Posen) c454f0ac63c6028f54c7eb51683b3ccdb475b19b blockfilter: Declare GCSFilter class for BIP 158 impl. (Jim Posen) 9b622dc72279b027c59d6541cddff53800fc689b streams: Unit tests for BitStreamReader and BitStreamWriter. (Jim Posen) fe943f99bf0a2bbb12e30bc4803c0337e3c95b93 streams: Implement BitStreamReader/Writer classes. (Jim Posen) 87f2d9ee43a9220076b1959d1ca65245d9591be9 streams: Unit test for VectorReader class. (Jim Posen) 947133dec92cd25ec2b3358c09b8614ba6fb40d4 streams: Create VectorReader stream interface for vectors. (Jim Posen) Pull request description: This implements the compact block filter construction in [BIP 158](https://github.com/bitcoin/bips/blob/master/bip-0158.mediawiki). The code is not used anywhere in the Bitcoin Core code base yet. The next step towards [BIP 157](https://github.com/bitcoin/bips/blob/master/bip-0157.mediawiki) support would be to create an indexing module similar to `TxIndex` that constructs the basic and extended filters for each validated block. ### Filter Sizes [Here](https://gateway.ipfs.io/ipfs/QmRqaAAQZ5ZX5eqxP7J2R1MzFrc2WDdKSWJEKtQzyawqog) is a CSV of filter sizes for blocks in the main chain. As you can see below, the ratio of filter size to block size drops after the first ~150,000 blocks: ![filter_sizes](https://user-images.githubusercontent.com/881253/42900589-299772d4-8a7e-11e8-886d-0d4f3f4fbe44.png) The reason for the relatively large filter sizes is that Golomb-coded sets only achieve good compression with a sufficient number of elements. Empirically, the average element size with 100 elements is 14% larger than with 10,000 elements. The ratio of filter size to block size is computed without witness data for basic filters. Here is a summary table of filter size ratios *for blocks after height 150,000*: | Stat | Filter Type | |-------|--------------| | Weighted Size Ratio Mean | 0.0198 | | Size Ratio Mean | 0.0224 | | Size Ratio Std Deviation | 0.0202 | | Mean Element Size (bits) | 21.145 | | Approx Theoretical Min Element Size (bits) | 21.025 | Tree-SHA512: 2d045fbfc3fc45490ecb9b08d2f7e4dbbe7cd8c1c939f06bbdb8e8aacfe4c495cdb67c820e52520baebbf8a8305a0efd8e59d3fa8e367574a4b830509a39223f
2018-08-26 16:57:01 +02:00
template <typename IStream>
class BitStreamReader
{
private:
IStream& m_istream;
/// Buffered byte read in from the input stream. A new byte is read into the
/// buffer when m_offset reaches 8.
uint8_t m_buffer{0};
/// Number of high order bits in m_buffer already returned by previous
/// Read() calls. The next bit to be returned is at this offset from the
/// most significant bit position.
int m_offset{8};
public:
explicit BitStreamReader(IStream& istream) : m_istream(istream) {}
/** Read the specified number of bits from the stream. The data is returned
* in the nbits least significant bits of a 64-bit uint.
Merge #12254: BIP 158: Compact Block Filters for Light Clients 254c85b68794ada713dbdae415db72adf5fcbaf3 bench: Benchmark GCS filter creation and matching. (Jim Posen) f33b717a85363e067316c133a542559d2f4aaeca blockfilter: Optimization on compilers with int128 support. (Jim Posen) 97b64d67daf0336dfb64b132f3e4d6a4c1967da4 blockfilter: Unit test against BIP 158 test vectors. (Jim Posen) a4afb9cadbaecb0676e6475ab8d32a52faecb47a blockfilter: Additional helper methods to compute hash and header. (Jim Posen) cd09c7925b5af4104834971cfe072251e3ac2bda blockfilter: Serialization methods on BlockFilter. (Jim Posen) c1855f6052aca806fdb51be01b30dfeee8b55f40 blockfilter: Construction of basic block filters. (Jim Posen) 53e7874e079f9ddfe8b176f11d46e6b59c7283d5 blockfilter: Simple test for GCSFilter construction and Match. (Jim Posen) 558c536e35a25594881693e6ff01d275c88d7af1 blockfilter: Implement GCSFilter Match methods. (Jim Posen) cf70b550054eed36f194eaa13f4a9cb31e32df38 blockfilter: Implement GCSFilter constructors. (Jim Posen) c454f0ac63c6028f54c7eb51683b3ccdb475b19b blockfilter: Declare GCSFilter class for BIP 158 impl. (Jim Posen) 9b622dc72279b027c59d6541cddff53800fc689b streams: Unit tests for BitStreamReader and BitStreamWriter. (Jim Posen) fe943f99bf0a2bbb12e30bc4803c0337e3c95b93 streams: Implement BitStreamReader/Writer classes. (Jim Posen) 87f2d9ee43a9220076b1959d1ca65245d9591be9 streams: Unit test for VectorReader class. (Jim Posen) 947133dec92cd25ec2b3358c09b8614ba6fb40d4 streams: Create VectorReader stream interface for vectors. (Jim Posen) Pull request description: This implements the compact block filter construction in [BIP 158](https://github.com/bitcoin/bips/blob/master/bip-0158.mediawiki). The code is not used anywhere in the Bitcoin Core code base yet. The next step towards [BIP 157](https://github.com/bitcoin/bips/blob/master/bip-0157.mediawiki) support would be to create an indexing module similar to `TxIndex` that constructs the basic and extended filters for each validated block. ### Filter Sizes [Here](https://gateway.ipfs.io/ipfs/QmRqaAAQZ5ZX5eqxP7J2R1MzFrc2WDdKSWJEKtQzyawqog) is a CSV of filter sizes for blocks in the main chain. As you can see below, the ratio of filter size to block size drops after the first ~150,000 blocks: ![filter_sizes](https://user-images.githubusercontent.com/881253/42900589-299772d4-8a7e-11e8-886d-0d4f3f4fbe44.png) The reason for the relatively large filter sizes is that Golomb-coded sets only achieve good compression with a sufficient number of elements. Empirically, the average element size with 100 elements is 14% larger than with 10,000 elements. The ratio of filter size to block size is computed without witness data for basic filters. Here is a summary table of filter size ratios *for blocks after height 150,000*: | Stat | Filter Type | |-------|--------------| | Weighted Size Ratio Mean | 0.0198 | | Size Ratio Mean | 0.0224 | | Size Ratio Std Deviation | 0.0202 | | Mean Element Size (bits) | 21.145 | | Approx Theoretical Min Element Size (bits) | 21.025 | Tree-SHA512: 2d045fbfc3fc45490ecb9b08d2f7e4dbbe7cd8c1c939f06bbdb8e8aacfe4c495cdb67c820e52520baebbf8a8305a0efd8e59d3fa8e367574a4b830509a39223f
2018-08-26 16:57:01 +02:00
*/
uint64_t Read(int nbits) {
if (nbits < 0 || nbits > 64) {
throw std::out_of_range("nbits must be between 0 and 64");
}
uint64_t data = 0;
while (nbits > 0) {
if (m_offset == 8) {
m_istream >> m_buffer;
m_offset = 0;
}
Merge #12254: BIP 158: Compact Block Filters for Light Clients 254c85b68794ada713dbdae415db72adf5fcbaf3 bench: Benchmark GCS filter creation and matching. (Jim Posen) f33b717a85363e067316c133a542559d2f4aaeca blockfilter: Optimization on compilers with int128 support. (Jim Posen) 97b64d67daf0336dfb64b132f3e4d6a4c1967da4 blockfilter: Unit test against BIP 158 test vectors. (Jim Posen) a4afb9cadbaecb0676e6475ab8d32a52faecb47a blockfilter: Additional helper methods to compute hash and header. (Jim Posen) cd09c7925b5af4104834971cfe072251e3ac2bda blockfilter: Serialization methods on BlockFilter. (Jim Posen) c1855f6052aca806fdb51be01b30dfeee8b55f40 blockfilter: Construction of basic block filters. (Jim Posen) 53e7874e079f9ddfe8b176f11d46e6b59c7283d5 blockfilter: Simple test for GCSFilter construction and Match. (Jim Posen) 558c536e35a25594881693e6ff01d275c88d7af1 blockfilter: Implement GCSFilter Match methods. (Jim Posen) cf70b550054eed36f194eaa13f4a9cb31e32df38 blockfilter: Implement GCSFilter constructors. (Jim Posen) c454f0ac63c6028f54c7eb51683b3ccdb475b19b blockfilter: Declare GCSFilter class for BIP 158 impl. (Jim Posen) 9b622dc72279b027c59d6541cddff53800fc689b streams: Unit tests for BitStreamReader and BitStreamWriter. (Jim Posen) fe943f99bf0a2bbb12e30bc4803c0337e3c95b93 streams: Implement BitStreamReader/Writer classes. (Jim Posen) 87f2d9ee43a9220076b1959d1ca65245d9591be9 streams: Unit test for VectorReader class. (Jim Posen) 947133dec92cd25ec2b3358c09b8614ba6fb40d4 streams: Create VectorReader stream interface for vectors. (Jim Posen) Pull request description: This implements the compact block filter construction in [BIP 158](https://github.com/bitcoin/bips/blob/master/bip-0158.mediawiki). The code is not used anywhere in the Bitcoin Core code base yet. The next step towards [BIP 157](https://github.com/bitcoin/bips/blob/master/bip-0157.mediawiki) support would be to create an indexing module similar to `TxIndex` that constructs the basic and extended filters for each validated block. ### Filter Sizes [Here](https://gateway.ipfs.io/ipfs/QmRqaAAQZ5ZX5eqxP7J2R1MzFrc2WDdKSWJEKtQzyawqog) is a CSV of filter sizes for blocks in the main chain. As you can see below, the ratio of filter size to block size drops after the first ~150,000 blocks: ![filter_sizes](https://user-images.githubusercontent.com/881253/42900589-299772d4-8a7e-11e8-886d-0d4f3f4fbe44.png) The reason for the relatively large filter sizes is that Golomb-coded sets only achieve good compression with a sufficient number of elements. Empirically, the average element size with 100 elements is 14% larger than with 10,000 elements. The ratio of filter size to block size is computed without witness data for basic filters. Here is a summary table of filter size ratios *for blocks after height 150,000*: | Stat | Filter Type | |-------|--------------| | Weighted Size Ratio Mean | 0.0198 | | Size Ratio Mean | 0.0224 | | Size Ratio Std Deviation | 0.0202 | | Mean Element Size (bits) | 21.145 | | Approx Theoretical Min Element Size (bits) | 21.025 | Tree-SHA512: 2d045fbfc3fc45490ecb9b08d2f7e4dbbe7cd8c1c939f06bbdb8e8aacfe4c495cdb67c820e52520baebbf8a8305a0efd8e59d3fa8e367574a4b830509a39223f
2018-08-26 16:57:01 +02:00
int bits = std::min(8 - m_offset, nbits);
data <<= bits;
data |= static_cast<uint8_t>(m_buffer << m_offset) >> (8 - bits);
m_offset += bits;
nbits -= bits;
}
return data;
}
};
Merge #12254: BIP 158: Compact Block Filters for Light Clients 254c85b68794ada713dbdae415db72adf5fcbaf3 bench: Benchmark GCS filter creation and matching. (Jim Posen) f33b717a85363e067316c133a542559d2f4aaeca blockfilter: Optimization on compilers with int128 support. (Jim Posen) 97b64d67daf0336dfb64b132f3e4d6a4c1967da4 blockfilter: Unit test against BIP 158 test vectors. (Jim Posen) a4afb9cadbaecb0676e6475ab8d32a52faecb47a blockfilter: Additional helper methods to compute hash and header. (Jim Posen) cd09c7925b5af4104834971cfe072251e3ac2bda blockfilter: Serialization methods on BlockFilter. (Jim Posen) c1855f6052aca806fdb51be01b30dfeee8b55f40 blockfilter: Construction of basic block filters. (Jim Posen) 53e7874e079f9ddfe8b176f11d46e6b59c7283d5 blockfilter: Simple test for GCSFilter construction and Match. (Jim Posen) 558c536e35a25594881693e6ff01d275c88d7af1 blockfilter: Implement GCSFilter Match methods. (Jim Posen) cf70b550054eed36f194eaa13f4a9cb31e32df38 blockfilter: Implement GCSFilter constructors. (Jim Posen) c454f0ac63c6028f54c7eb51683b3ccdb475b19b blockfilter: Declare GCSFilter class for BIP 158 impl. (Jim Posen) 9b622dc72279b027c59d6541cddff53800fc689b streams: Unit tests for BitStreamReader and BitStreamWriter. (Jim Posen) fe943f99bf0a2bbb12e30bc4803c0337e3c95b93 streams: Implement BitStreamReader/Writer classes. (Jim Posen) 87f2d9ee43a9220076b1959d1ca65245d9591be9 streams: Unit test for VectorReader class. (Jim Posen) 947133dec92cd25ec2b3358c09b8614ba6fb40d4 streams: Create VectorReader stream interface for vectors. (Jim Posen) Pull request description: This implements the compact block filter construction in [BIP 158](https://github.com/bitcoin/bips/blob/master/bip-0158.mediawiki). The code is not used anywhere in the Bitcoin Core code base yet. The next step towards [BIP 157](https://github.com/bitcoin/bips/blob/master/bip-0157.mediawiki) support would be to create an indexing module similar to `TxIndex` that constructs the basic and extended filters for each validated block. ### Filter Sizes [Here](https://gateway.ipfs.io/ipfs/QmRqaAAQZ5ZX5eqxP7J2R1MzFrc2WDdKSWJEKtQzyawqog) is a CSV of filter sizes for blocks in the main chain. As you can see below, the ratio of filter size to block size drops after the first ~150,000 blocks: ![filter_sizes](https://user-images.githubusercontent.com/881253/42900589-299772d4-8a7e-11e8-886d-0d4f3f4fbe44.png) The reason for the relatively large filter sizes is that Golomb-coded sets only achieve good compression with a sufficient number of elements. Empirically, the average element size with 100 elements is 14% larger than with 10,000 elements. The ratio of filter size to block size is computed without witness data for basic filters. Here is a summary table of filter size ratios *for blocks after height 150,000*: | Stat | Filter Type | |-------|--------------| | Weighted Size Ratio Mean | 0.0198 | | Size Ratio Mean | 0.0224 | | Size Ratio Std Deviation | 0.0202 | | Mean Element Size (bits) | 21.145 | | Approx Theoretical Min Element Size (bits) | 21.025 | Tree-SHA512: 2d045fbfc3fc45490ecb9b08d2f7e4dbbe7cd8c1c939f06bbdb8e8aacfe4c495cdb67c820e52520baebbf8a8305a0efd8e59d3fa8e367574a4b830509a39223f
2018-08-26 16:57:01 +02:00
template <typename OStream>
class BitStreamWriter
{
private:
OStream& m_ostream;
Merge #12254: BIP 158: Compact Block Filters for Light Clients 254c85b68794ada713dbdae415db72adf5fcbaf3 bench: Benchmark GCS filter creation and matching. (Jim Posen) f33b717a85363e067316c133a542559d2f4aaeca blockfilter: Optimization on compilers with int128 support. (Jim Posen) 97b64d67daf0336dfb64b132f3e4d6a4c1967da4 blockfilter: Unit test against BIP 158 test vectors. (Jim Posen) a4afb9cadbaecb0676e6475ab8d32a52faecb47a blockfilter: Additional helper methods to compute hash and header. (Jim Posen) cd09c7925b5af4104834971cfe072251e3ac2bda blockfilter: Serialization methods on BlockFilter. (Jim Posen) c1855f6052aca806fdb51be01b30dfeee8b55f40 blockfilter: Construction of basic block filters. (Jim Posen) 53e7874e079f9ddfe8b176f11d46e6b59c7283d5 blockfilter: Simple test for GCSFilter construction and Match. (Jim Posen) 558c536e35a25594881693e6ff01d275c88d7af1 blockfilter: Implement GCSFilter Match methods. (Jim Posen) cf70b550054eed36f194eaa13f4a9cb31e32df38 blockfilter: Implement GCSFilter constructors. (Jim Posen) c454f0ac63c6028f54c7eb51683b3ccdb475b19b blockfilter: Declare GCSFilter class for BIP 158 impl. (Jim Posen) 9b622dc72279b027c59d6541cddff53800fc689b streams: Unit tests for BitStreamReader and BitStreamWriter. (Jim Posen) fe943f99bf0a2bbb12e30bc4803c0337e3c95b93 streams: Implement BitStreamReader/Writer classes. (Jim Posen) 87f2d9ee43a9220076b1959d1ca65245d9591be9 streams: Unit test for VectorReader class. (Jim Posen) 947133dec92cd25ec2b3358c09b8614ba6fb40d4 streams: Create VectorReader stream interface for vectors. (Jim Posen) Pull request description: This implements the compact block filter construction in [BIP 158](https://github.com/bitcoin/bips/blob/master/bip-0158.mediawiki). The code is not used anywhere in the Bitcoin Core code base yet. The next step towards [BIP 157](https://github.com/bitcoin/bips/blob/master/bip-0157.mediawiki) support would be to create an indexing module similar to `TxIndex` that constructs the basic and extended filters for each validated block. ### Filter Sizes [Here](https://gateway.ipfs.io/ipfs/QmRqaAAQZ5ZX5eqxP7J2R1MzFrc2WDdKSWJEKtQzyawqog) is a CSV of filter sizes for blocks in the main chain. As you can see below, the ratio of filter size to block size drops after the first ~150,000 blocks: ![filter_sizes](https://user-images.githubusercontent.com/881253/42900589-299772d4-8a7e-11e8-886d-0d4f3f4fbe44.png) The reason for the relatively large filter sizes is that Golomb-coded sets only achieve good compression with a sufficient number of elements. Empirically, the average element size with 100 elements is 14% larger than with 10,000 elements. The ratio of filter size to block size is computed without witness data for basic filters. Here is a summary table of filter size ratios *for blocks after height 150,000*: | Stat | Filter Type | |-------|--------------| | Weighted Size Ratio Mean | 0.0198 | | Size Ratio Mean | 0.0224 | | Size Ratio Std Deviation | 0.0202 | | Mean Element Size (bits) | 21.145 | | Approx Theoretical Min Element Size (bits) | 21.025 | Tree-SHA512: 2d045fbfc3fc45490ecb9b08d2f7e4dbbe7cd8c1c939f06bbdb8e8aacfe4c495cdb67c820e52520baebbf8a8305a0efd8e59d3fa8e367574a4b830509a39223f
2018-08-26 16:57:01 +02:00
/// Buffered byte waiting to be written to the output stream. The byte is
/// written buffer when m_offset reaches 8 or Flush() is called.
uint8_t m_buffer{0};
Merge #12254: BIP 158: Compact Block Filters for Light Clients 254c85b68794ada713dbdae415db72adf5fcbaf3 bench: Benchmark GCS filter creation and matching. (Jim Posen) f33b717a85363e067316c133a542559d2f4aaeca blockfilter: Optimization on compilers with int128 support. (Jim Posen) 97b64d67daf0336dfb64b132f3e4d6a4c1967da4 blockfilter: Unit test against BIP 158 test vectors. (Jim Posen) a4afb9cadbaecb0676e6475ab8d32a52faecb47a blockfilter: Additional helper methods to compute hash and header. (Jim Posen) cd09c7925b5af4104834971cfe072251e3ac2bda blockfilter: Serialization methods on BlockFilter. (Jim Posen) c1855f6052aca806fdb51be01b30dfeee8b55f40 blockfilter: Construction of basic block filters. (Jim Posen) 53e7874e079f9ddfe8b176f11d46e6b59c7283d5 blockfilter: Simple test for GCSFilter construction and Match. (Jim Posen) 558c536e35a25594881693e6ff01d275c88d7af1 blockfilter: Implement GCSFilter Match methods. (Jim Posen) cf70b550054eed36f194eaa13f4a9cb31e32df38 blockfilter: Implement GCSFilter constructors. (Jim Posen) c454f0ac63c6028f54c7eb51683b3ccdb475b19b blockfilter: Declare GCSFilter class for BIP 158 impl. (Jim Posen) 9b622dc72279b027c59d6541cddff53800fc689b streams: Unit tests for BitStreamReader and BitStreamWriter. (Jim Posen) fe943f99bf0a2bbb12e30bc4803c0337e3c95b93 streams: Implement BitStreamReader/Writer classes. (Jim Posen) 87f2d9ee43a9220076b1959d1ca65245d9591be9 streams: Unit test for VectorReader class. (Jim Posen) 947133dec92cd25ec2b3358c09b8614ba6fb40d4 streams: Create VectorReader stream interface for vectors. (Jim Posen) Pull request description: This implements the compact block filter construction in [BIP 158](https://github.com/bitcoin/bips/blob/master/bip-0158.mediawiki). The code is not used anywhere in the Bitcoin Core code base yet. The next step towards [BIP 157](https://github.com/bitcoin/bips/blob/master/bip-0157.mediawiki) support would be to create an indexing module similar to `TxIndex` that constructs the basic and extended filters for each validated block. ### Filter Sizes [Here](https://gateway.ipfs.io/ipfs/QmRqaAAQZ5ZX5eqxP7J2R1MzFrc2WDdKSWJEKtQzyawqog) is a CSV of filter sizes for blocks in the main chain. As you can see below, the ratio of filter size to block size drops after the first ~150,000 blocks: ![filter_sizes](https://user-images.githubusercontent.com/881253/42900589-299772d4-8a7e-11e8-886d-0d4f3f4fbe44.png) The reason for the relatively large filter sizes is that Golomb-coded sets only achieve good compression with a sufficient number of elements. Empirically, the average element size with 100 elements is 14% larger than with 10,000 elements. The ratio of filter size to block size is computed without witness data for basic filters. Here is a summary table of filter size ratios *for blocks after height 150,000*: | Stat | Filter Type | |-------|--------------| | Weighted Size Ratio Mean | 0.0198 | | Size Ratio Mean | 0.0224 | | Size Ratio Std Deviation | 0.0202 | | Mean Element Size (bits) | 21.145 | | Approx Theoretical Min Element Size (bits) | 21.025 | Tree-SHA512: 2d045fbfc3fc45490ecb9b08d2f7e4dbbe7cd8c1c939f06bbdb8e8aacfe4c495cdb67c820e52520baebbf8a8305a0efd8e59d3fa8e367574a4b830509a39223f
2018-08-26 16:57:01 +02:00
/// Number of high order bits in m_buffer already written by previous
/// Write() calls and not yet flushed to the stream. The next bit to be
/// written to is at this offset from the most significant bit position.
int m_offset{0};
Merge #12254: BIP 158: Compact Block Filters for Light Clients 254c85b68794ada713dbdae415db72adf5fcbaf3 bench: Benchmark GCS filter creation and matching. (Jim Posen) f33b717a85363e067316c133a542559d2f4aaeca blockfilter: Optimization on compilers with int128 support. (Jim Posen) 97b64d67daf0336dfb64b132f3e4d6a4c1967da4 blockfilter: Unit test against BIP 158 test vectors. (Jim Posen) a4afb9cadbaecb0676e6475ab8d32a52faecb47a blockfilter: Additional helper methods to compute hash and header. (Jim Posen) cd09c7925b5af4104834971cfe072251e3ac2bda blockfilter: Serialization methods on BlockFilter. (Jim Posen) c1855f6052aca806fdb51be01b30dfeee8b55f40 blockfilter: Construction of basic block filters. (Jim Posen) 53e7874e079f9ddfe8b176f11d46e6b59c7283d5 blockfilter: Simple test for GCSFilter construction and Match. (Jim Posen) 558c536e35a25594881693e6ff01d275c88d7af1 blockfilter: Implement GCSFilter Match methods. (Jim Posen) cf70b550054eed36f194eaa13f4a9cb31e32df38 blockfilter: Implement GCSFilter constructors. (Jim Posen) c454f0ac63c6028f54c7eb51683b3ccdb475b19b blockfilter: Declare GCSFilter class for BIP 158 impl. (Jim Posen) 9b622dc72279b027c59d6541cddff53800fc689b streams: Unit tests for BitStreamReader and BitStreamWriter. (Jim Posen) fe943f99bf0a2bbb12e30bc4803c0337e3c95b93 streams: Implement BitStreamReader/Writer classes. (Jim Posen) 87f2d9ee43a9220076b1959d1ca65245d9591be9 streams: Unit test for VectorReader class. (Jim Posen) 947133dec92cd25ec2b3358c09b8614ba6fb40d4 streams: Create VectorReader stream interface for vectors. (Jim Posen) Pull request description: This implements the compact block filter construction in [BIP 158](https://github.com/bitcoin/bips/blob/master/bip-0158.mediawiki). The code is not used anywhere in the Bitcoin Core code base yet. The next step towards [BIP 157](https://github.com/bitcoin/bips/blob/master/bip-0157.mediawiki) support would be to create an indexing module similar to `TxIndex` that constructs the basic and extended filters for each validated block. ### Filter Sizes [Here](https://gateway.ipfs.io/ipfs/QmRqaAAQZ5ZX5eqxP7J2R1MzFrc2WDdKSWJEKtQzyawqog) is a CSV of filter sizes for blocks in the main chain. As you can see below, the ratio of filter size to block size drops after the first ~150,000 blocks: ![filter_sizes](https://user-images.githubusercontent.com/881253/42900589-299772d4-8a7e-11e8-886d-0d4f3f4fbe44.png) The reason for the relatively large filter sizes is that Golomb-coded sets only achieve good compression with a sufficient number of elements. Empirically, the average element size with 100 elements is 14% larger than with 10,000 elements. The ratio of filter size to block size is computed without witness data for basic filters. Here is a summary table of filter size ratios *for blocks after height 150,000*: | Stat | Filter Type | |-------|--------------| | Weighted Size Ratio Mean | 0.0198 | | Size Ratio Mean | 0.0224 | | Size Ratio Std Deviation | 0.0202 | | Mean Element Size (bits) | 21.145 | | Approx Theoretical Min Element Size (bits) | 21.025 | Tree-SHA512: 2d045fbfc3fc45490ecb9b08d2f7e4dbbe7cd8c1c939f06bbdb8e8aacfe4c495cdb67c820e52520baebbf8a8305a0efd8e59d3fa8e367574a4b830509a39223f
2018-08-26 16:57:01 +02:00
public:
explicit BitStreamWriter(OStream& ostream) : m_ostream(ostream) {}
Merge #12254: BIP 158: Compact Block Filters for Light Clients 254c85b68794ada713dbdae415db72adf5fcbaf3 bench: Benchmark GCS filter creation and matching. (Jim Posen) f33b717a85363e067316c133a542559d2f4aaeca blockfilter: Optimization on compilers with int128 support. (Jim Posen) 97b64d67daf0336dfb64b132f3e4d6a4c1967da4 blockfilter: Unit test against BIP 158 test vectors. (Jim Posen) a4afb9cadbaecb0676e6475ab8d32a52faecb47a blockfilter: Additional helper methods to compute hash and header. (Jim Posen) cd09c7925b5af4104834971cfe072251e3ac2bda blockfilter: Serialization methods on BlockFilter. (Jim Posen) c1855f6052aca806fdb51be01b30dfeee8b55f40 blockfilter: Construction of basic block filters. (Jim Posen) 53e7874e079f9ddfe8b176f11d46e6b59c7283d5 blockfilter: Simple test for GCSFilter construction and Match. (Jim Posen) 558c536e35a25594881693e6ff01d275c88d7af1 blockfilter: Implement GCSFilter Match methods. (Jim Posen) cf70b550054eed36f194eaa13f4a9cb31e32df38 blockfilter: Implement GCSFilter constructors. (Jim Posen) c454f0ac63c6028f54c7eb51683b3ccdb475b19b blockfilter: Declare GCSFilter class for BIP 158 impl. (Jim Posen) 9b622dc72279b027c59d6541cddff53800fc689b streams: Unit tests for BitStreamReader and BitStreamWriter. (Jim Posen) fe943f99bf0a2bbb12e30bc4803c0337e3c95b93 streams: Implement BitStreamReader/Writer classes. (Jim Posen) 87f2d9ee43a9220076b1959d1ca65245d9591be9 streams: Unit test for VectorReader class. (Jim Posen) 947133dec92cd25ec2b3358c09b8614ba6fb40d4 streams: Create VectorReader stream interface for vectors. (Jim Posen) Pull request description: This implements the compact block filter construction in [BIP 158](https://github.com/bitcoin/bips/blob/master/bip-0158.mediawiki). The code is not used anywhere in the Bitcoin Core code base yet. The next step towards [BIP 157](https://github.com/bitcoin/bips/blob/master/bip-0157.mediawiki) support would be to create an indexing module similar to `TxIndex` that constructs the basic and extended filters for each validated block. ### Filter Sizes [Here](https://gateway.ipfs.io/ipfs/QmRqaAAQZ5ZX5eqxP7J2R1MzFrc2WDdKSWJEKtQzyawqog) is a CSV of filter sizes for blocks in the main chain. As you can see below, the ratio of filter size to block size drops after the first ~150,000 blocks: ![filter_sizes](https://user-images.githubusercontent.com/881253/42900589-299772d4-8a7e-11e8-886d-0d4f3f4fbe44.png) The reason for the relatively large filter sizes is that Golomb-coded sets only achieve good compression with a sufficient number of elements. Empirically, the average element size with 100 elements is 14% larger than with 10,000 elements. The ratio of filter size to block size is computed without witness data for basic filters. Here is a summary table of filter size ratios *for blocks after height 150,000*: | Stat | Filter Type | |-------|--------------| | Weighted Size Ratio Mean | 0.0198 | | Size Ratio Mean | 0.0224 | | Size Ratio Std Deviation | 0.0202 | | Mean Element Size (bits) | 21.145 | | Approx Theoretical Min Element Size (bits) | 21.025 | Tree-SHA512: 2d045fbfc3fc45490ecb9b08d2f7e4dbbe7cd8c1c939f06bbdb8e8aacfe4c495cdb67c820e52520baebbf8a8305a0efd8e59d3fa8e367574a4b830509a39223f
2018-08-26 16:57:01 +02:00
~BitStreamWriter()
{
Flush();
}
/** Write the nbits least significant bits of a 64-bit int to the output
* stream. Data is buffered until it completes an octet.
*/
void Write(uint64_t data, int nbits) {
if (nbits < 0 || nbits > 64) {
throw std::out_of_range("nbits must be between 0 and 64");
}
while (nbits > 0) {
int bits = std::min(8 - m_offset, nbits);
m_buffer |= (data << (64 - nbits)) >> (64 - 8 + m_offset);
m_offset += bits;
nbits -= bits;
if (m_offset == 8) {
Flush();
}
}
}
/** Flush any unwritten bits to the output stream, padding with 0's to the
* next byte boundary.
*/
void Flush() {
if (m_offset == 0) {
return;
}
m_ostream << m_buffer;
m_buffer = 0;
m_offset = 0;
}
};
/** Non-refcounted RAII wrapper for FILE*
*
* Will automatically close the file when it goes out of scope if not null.
* If you're returning the file pointer, return file.release().
* If you need to close the file early, use file.fclose() instead of fclose(file).
*/
class CAutoFile
{
private:
const int nType;
const int nVersion;
FILE* file;
public:
CAutoFile(FILE* filenew, int nTypeIn, int nVersionIn) : nType(nTypeIn), nVersion(nVersionIn)
{
file = filenew;
}
~CAutoFile()
{
fclose();
}
// Disallow copies
CAutoFile(const CAutoFile&) = delete;
CAutoFile& operator=(const CAutoFile&) = delete;
void fclose()
{
if (file) {
::fclose(file);
file = nullptr;
}
}
/** Get wrapped FILE* with transfer of ownership.
* @note This will invalidate the CAutoFile object, and makes it the responsibility of the caller
* of this function to clean up the returned FILE*.
*/
FILE* release() { FILE* ret = file; file = nullptr; return ret; }
/** Get wrapped FILE* without transfer of ownership.
* @note Ownership of the FILE* will remain with this class. Use this only if the scope of the
* CAutoFile outlives use of the passed pointer.
*/
FILE* Get() const { return file; }
/** Return true if the wrapped FILE* is nullptr, false otherwise.
*/
bool IsNull() const { return (file == nullptr); }
//
// Stream subset
//
int GetType() const { return nType; }
int GetVersion() const { return nVersion; }
void read(char* pch, size_t nSize)
{
if (!file)
throw std::ios_base::failure("CAutoFile::read: file handle is nullptr");
if (fread(pch, 1, nSize, file) != nSize)
throw std::ios_base::failure(feof(file) ? "CAutoFile::read: end of file" : "CAutoFile::read: fread failed");
}
void ignore(size_t nSize)
{
if (!file)
throw std::ios_base::failure("CAutoFile::ignore: file handle is nullptr");
unsigned char data[4096];
while (nSize > 0) {
size_t nNow = std::min<size_t>(nSize, sizeof(data));
if (fread(data, 1, nNow, file) != nNow)
throw std::ios_base::failure(feof(file) ? "CAutoFile::ignore: end of file" : "CAutoFile::read: fread failed");
nSize -= nNow;
}
}
void write(const char* pch, size_t nSize)
{
if (!file)
throw std::ios_base::failure("CAutoFile::write: file handle is nullptr");
if (fwrite(pch, 1, nSize, file) != nSize)
throw std::ios_base::failure("CAutoFile::write: write failed");
}
template<typename T>
CAutoFile& operator<<(const T& obj)
{
// Serialize to this stream
if (!file)
throw std::ios_base::failure("CAutoFile::operator<<: file handle is nullptr");
::Serialize(*this, obj);
return (*this);
}
template<typename T>
CAutoFile& operator>>(T&& obj)
{
// Unserialize from this stream
if (!file)
throw std::ios_base::failure("CAutoFile::operator>>: file handle is nullptr");
::Unserialize(*this, obj);
return (*this);
}
};
/** Non-refcounted RAII wrapper around a FILE* that implements a ring buffer to
* deserialize from. It guarantees the ability to rewind a given number of bytes.
*
* Will automatically close the file when it goes out of scope if not null.
* If you need to close the file early, use file.fclose() instead of fclose(file).
*/
class CBufferedFile
{
private:
const int nType;
const int nVersion;
FILE *src; //!< source file
uint64_t nSrcPos; //!< how many bytes have been read from source
uint64_t nReadPos; //!< how many bytes have been read from this
uint64_t nReadLimit; //!< up to which position we're allowed to read
uint64_t nRewind; //!< how many bytes we guarantee to rewind
std::vector<char> vchBuf; //!< the buffer
protected:
//! read data from the source to fill the buffer
bool Fill() {
unsigned int pos = nSrcPos % vchBuf.size();
unsigned int readNow = vchBuf.size() - pos;
unsigned int nAvail = vchBuf.size() - (nSrcPos - nReadPos) - nRewind;
if (nAvail < readNow)
readNow = nAvail;
if (readNow == 0)
return false;
size_t nBytes = fread((void*)&vchBuf[pos], 1, readNow, src);
if (nBytes == 0) {
throw std::ios_base::failure(feof(src) ? "CBufferedFile::Fill: end of file" : "CBufferedFile::Fill: fread failed");
}
Merge #16577: util: CBufferedFile fixes and unit test efd2474d17098c754367b844ec646ebececc7c74 util: CBufferedFile fixes (Larry Ruane) Pull request description: The `CBufferedFile` object guarantees its user is able to "rewind" the data stream (that's being read from a file) up to a certain number of bytes, as specified by the user in the constructor. This guarantee is not honored due to a bug in the `SetPos` method. Such rewinding is done in `LoadExternalBlockFile()` (currently the only user of this object), which deserializes a series of `CBlock` objects. If that function encounters something unexpected in the data stream, which is coming from a `blocks/blk00???.dat` file, it "rewinds" to an earlier position in the stream to try to get in sync again. The `CBufferedFile` object does not actually rewind its file offset; it simply repositions its internal offset, `nReadPos`, to an earlier position within the object's private buffer; this is why there's a limit to how far the user may rewind. If `LoadExternalBlockFile()` needs to rewind (call `blkdat.SetPos()`), the stream may not be positioned as it should be, causing errors in deserialization. This need to rewind is probably rare, which is likely why this bug hasn't been noticed already. But if this object is used elsewhere in the future, this could be a serious problem, especially as, due to the nature of the bug, the `SetPos()` _sometimes_ works. This PR adds a unit test for `CBufferedFile` that fails due to this bug. (Until now it has had no unit tests.) The unit test provides good documentation and examples for developers trying to understand `LoadExternalBlockFile()` and for future users of this object. This PR also adds code to throw an exception from the constructor if the rewind argument is not less than the buffer size (since that doesn't make any sense). Finally, I discovered that the object is too restrictive in one respect: When the deserialization methods call this object's `read` method, a check ensures that the number of bytes being requested is less than the size of the buffer (adjusting for the rewind size), else it throws an exception. This restriction is unnecessary; the object being deserialized can be larger than the buffer because multiple reads from disk can satisfy the request. ACKs for top commit: laanwj: ACK ~after squash.~ efd2474d17098c754367b844ec646ebececc7c74 mzumsande: I had intended to follow up earlier on my last comment, ACK efd2474d17098c754367b844ec646ebececc7c74. I reviewed the code, ran tests and did a successful reindex on testnet with this branch. Tree-SHA512: 695529e0af38bae2af4e0cc2895dda56a71b9059c3de04d32e09c0165a50f6aacee499f2042156ab5eaa6f0349bab6bcca4ef9f6f9ded4e60d4483beab7e4554
2019-09-26 13:37:06 +02:00
nSrcPos += nBytes;
return true;
}
public:
CBufferedFile(FILE *fileIn, uint64_t nBufSize, uint64_t nRewindIn, int nTypeIn, int nVersionIn) :
nType(nTypeIn), nVersion(nVersionIn), nSrcPos(0), nReadPos(0), nReadLimit(std::numeric_limits<uint64_t>::max()), nRewind(nRewindIn), vchBuf(nBufSize, 0)
{
Merge #16577: util: CBufferedFile fixes and unit test efd2474d17098c754367b844ec646ebececc7c74 util: CBufferedFile fixes (Larry Ruane) Pull request description: The `CBufferedFile` object guarantees its user is able to "rewind" the data stream (that's being read from a file) up to a certain number of bytes, as specified by the user in the constructor. This guarantee is not honored due to a bug in the `SetPos` method. Such rewinding is done in `LoadExternalBlockFile()` (currently the only user of this object), which deserializes a series of `CBlock` objects. If that function encounters something unexpected in the data stream, which is coming from a `blocks/blk00???.dat` file, it "rewinds" to an earlier position in the stream to try to get in sync again. The `CBufferedFile` object does not actually rewind its file offset; it simply repositions its internal offset, `nReadPos`, to an earlier position within the object's private buffer; this is why there's a limit to how far the user may rewind. If `LoadExternalBlockFile()` needs to rewind (call `blkdat.SetPos()`), the stream may not be positioned as it should be, causing errors in deserialization. This need to rewind is probably rare, which is likely why this bug hasn't been noticed already. But if this object is used elsewhere in the future, this could be a serious problem, especially as, due to the nature of the bug, the `SetPos()` _sometimes_ works. This PR adds a unit test for `CBufferedFile` that fails due to this bug. (Until now it has had no unit tests.) The unit test provides good documentation and examples for developers trying to understand `LoadExternalBlockFile()` and for future users of this object. This PR also adds code to throw an exception from the constructor if the rewind argument is not less than the buffer size (since that doesn't make any sense). Finally, I discovered that the object is too restrictive in one respect: When the deserialization methods call this object's `read` method, a check ensures that the number of bytes being requested is less than the size of the buffer (adjusting for the rewind size), else it throws an exception. This restriction is unnecessary; the object being deserialized can be larger than the buffer because multiple reads from disk can satisfy the request. ACKs for top commit: laanwj: ACK ~after squash.~ efd2474d17098c754367b844ec646ebececc7c74 mzumsande: I had intended to follow up earlier on my last comment, ACK efd2474d17098c754367b844ec646ebececc7c74. I reviewed the code, ran tests and did a successful reindex on testnet with this branch. Tree-SHA512: 695529e0af38bae2af4e0cc2895dda56a71b9059c3de04d32e09c0165a50f6aacee499f2042156ab5eaa6f0349bab6bcca4ef9f6f9ded4e60d4483beab7e4554
2019-09-26 13:37:06 +02:00
if (nRewindIn >= nBufSize)
throw std::ios_base::failure("Rewind limit must be less than buffer size");
src = fileIn;
}
~CBufferedFile()
{
fclose();
}
// Disallow copies
CBufferedFile(const CBufferedFile&) = delete;
CBufferedFile& operator=(const CBufferedFile&) = delete;
int GetVersion() const { return nVersion; }
int GetType() const { return nType; }
void fclose()
{
if (src) {
::fclose(src);
src = nullptr;
}
}
//! check whether we're at the end of the source file
bool eof() const {
return nReadPos == nSrcPos && feof(src);
}
//! read a number of bytes
void read(char *pch, size_t nSize) {
if (nSize + nReadPos > nReadLimit)
throw std::ios_base::failure("Read attempted past buffer limit");
while (nSize > 0) {
if (nReadPos == nSrcPos)
Fill();
unsigned int pos = nReadPos % vchBuf.size();
size_t nNow = nSize;
if (nNow + pos > vchBuf.size())
nNow = vchBuf.size() - pos;
if (nNow + nReadPos > nSrcPos)
nNow = nSrcPos - nReadPos;
memcpy(pch, &vchBuf[pos], nNow);
nReadPos += nNow;
pch += nNow;
nSize -= nNow;
}
}
//! return the current reading position
uint64_t GetPos() const {
return nReadPos;
}
//! rewind to a given reading position
bool SetPos(uint64_t nPos) {
Merge #16577: util: CBufferedFile fixes and unit test efd2474d17098c754367b844ec646ebececc7c74 util: CBufferedFile fixes (Larry Ruane) Pull request description: The `CBufferedFile` object guarantees its user is able to "rewind" the data stream (that's being read from a file) up to a certain number of bytes, as specified by the user in the constructor. This guarantee is not honored due to a bug in the `SetPos` method. Such rewinding is done in `LoadExternalBlockFile()` (currently the only user of this object), which deserializes a series of `CBlock` objects. If that function encounters something unexpected in the data stream, which is coming from a `blocks/blk00???.dat` file, it "rewinds" to an earlier position in the stream to try to get in sync again. The `CBufferedFile` object does not actually rewind its file offset; it simply repositions its internal offset, `nReadPos`, to an earlier position within the object's private buffer; this is why there's a limit to how far the user may rewind. If `LoadExternalBlockFile()` needs to rewind (call `blkdat.SetPos()`), the stream may not be positioned as it should be, causing errors in deserialization. This need to rewind is probably rare, which is likely why this bug hasn't been noticed already. But if this object is used elsewhere in the future, this could be a serious problem, especially as, due to the nature of the bug, the `SetPos()` _sometimes_ works. This PR adds a unit test for `CBufferedFile` that fails due to this bug. (Until now it has had no unit tests.) The unit test provides good documentation and examples for developers trying to understand `LoadExternalBlockFile()` and for future users of this object. This PR also adds code to throw an exception from the constructor if the rewind argument is not less than the buffer size (since that doesn't make any sense). Finally, I discovered that the object is too restrictive in one respect: When the deserialization methods call this object's `read` method, a check ensures that the number of bytes being requested is less than the size of the buffer (adjusting for the rewind size), else it throws an exception. This restriction is unnecessary; the object being deserialized can be larger than the buffer because multiple reads from disk can satisfy the request. ACKs for top commit: laanwj: ACK ~after squash.~ efd2474d17098c754367b844ec646ebececc7c74 mzumsande: I had intended to follow up earlier on my last comment, ACK efd2474d17098c754367b844ec646ebececc7c74. I reviewed the code, ran tests and did a successful reindex on testnet with this branch. Tree-SHA512: 695529e0af38bae2af4e0cc2895dda56a71b9059c3de04d32e09c0165a50f6aacee499f2042156ab5eaa6f0349bab6bcca4ef9f6f9ded4e60d4483beab7e4554
2019-09-26 13:37:06 +02:00
size_t bufsize = vchBuf.size();
if (nPos + bufsize < nSrcPos) {
// rewinding too far, rewind as far as possible
nReadPos = nSrcPos - bufsize;
return false;
Merge #16577: util: CBufferedFile fixes and unit test efd2474d17098c754367b844ec646ebececc7c74 util: CBufferedFile fixes (Larry Ruane) Pull request description: The `CBufferedFile` object guarantees its user is able to "rewind" the data stream (that's being read from a file) up to a certain number of bytes, as specified by the user in the constructor. This guarantee is not honored due to a bug in the `SetPos` method. Such rewinding is done in `LoadExternalBlockFile()` (currently the only user of this object), which deserializes a series of `CBlock` objects. If that function encounters something unexpected in the data stream, which is coming from a `blocks/blk00???.dat` file, it "rewinds" to an earlier position in the stream to try to get in sync again. The `CBufferedFile` object does not actually rewind its file offset; it simply repositions its internal offset, `nReadPos`, to an earlier position within the object's private buffer; this is why there's a limit to how far the user may rewind. If `LoadExternalBlockFile()` needs to rewind (call `blkdat.SetPos()`), the stream may not be positioned as it should be, causing errors in deserialization. This need to rewind is probably rare, which is likely why this bug hasn't been noticed already. But if this object is used elsewhere in the future, this could be a serious problem, especially as, due to the nature of the bug, the `SetPos()` _sometimes_ works. This PR adds a unit test for `CBufferedFile` that fails due to this bug. (Until now it has had no unit tests.) The unit test provides good documentation and examples for developers trying to understand `LoadExternalBlockFile()` and for future users of this object. This PR also adds code to throw an exception from the constructor if the rewind argument is not less than the buffer size (since that doesn't make any sense). Finally, I discovered that the object is too restrictive in one respect: When the deserialization methods call this object's `read` method, a check ensures that the number of bytes being requested is less than the size of the buffer (adjusting for the rewind size), else it throws an exception. This restriction is unnecessary; the object being deserialized can be larger than the buffer because multiple reads from disk can satisfy the request. ACKs for top commit: laanwj: ACK ~after squash.~ efd2474d17098c754367b844ec646ebececc7c74 mzumsande: I had intended to follow up earlier on my last comment, ACK efd2474d17098c754367b844ec646ebececc7c74. I reviewed the code, ran tests and did a successful reindex on testnet with this branch. Tree-SHA512: 695529e0af38bae2af4e0cc2895dda56a71b9059c3de04d32e09c0165a50f6aacee499f2042156ab5eaa6f0349bab6bcca4ef9f6f9ded4e60d4483beab7e4554
2019-09-26 13:37:06 +02:00
}
if (nPos > nSrcPos) {
// can't go this far forward, go as far as possible
nReadPos = nSrcPos;
return false;
}
Merge #16577: util: CBufferedFile fixes and unit test efd2474d17098c754367b844ec646ebececc7c74 util: CBufferedFile fixes (Larry Ruane) Pull request description: The `CBufferedFile` object guarantees its user is able to "rewind" the data stream (that's being read from a file) up to a certain number of bytes, as specified by the user in the constructor. This guarantee is not honored due to a bug in the `SetPos` method. Such rewinding is done in `LoadExternalBlockFile()` (currently the only user of this object), which deserializes a series of `CBlock` objects. If that function encounters something unexpected in the data stream, which is coming from a `blocks/blk00???.dat` file, it "rewinds" to an earlier position in the stream to try to get in sync again. The `CBufferedFile` object does not actually rewind its file offset; it simply repositions its internal offset, `nReadPos`, to an earlier position within the object's private buffer; this is why there's a limit to how far the user may rewind. If `LoadExternalBlockFile()` needs to rewind (call `blkdat.SetPos()`), the stream may not be positioned as it should be, causing errors in deserialization. This need to rewind is probably rare, which is likely why this bug hasn't been noticed already. But if this object is used elsewhere in the future, this could be a serious problem, especially as, due to the nature of the bug, the `SetPos()` _sometimes_ works. This PR adds a unit test for `CBufferedFile` that fails due to this bug. (Until now it has had no unit tests.) The unit test provides good documentation and examples for developers trying to understand `LoadExternalBlockFile()` and for future users of this object. This PR also adds code to throw an exception from the constructor if the rewind argument is not less than the buffer size (since that doesn't make any sense). Finally, I discovered that the object is too restrictive in one respect: When the deserialization methods call this object's `read` method, a check ensures that the number of bytes being requested is less than the size of the buffer (adjusting for the rewind size), else it throws an exception. This restriction is unnecessary; the object being deserialized can be larger than the buffer because multiple reads from disk can satisfy the request. ACKs for top commit: laanwj: ACK ~after squash.~ efd2474d17098c754367b844ec646ebececc7c74 mzumsande: I had intended to follow up earlier on my last comment, ACK efd2474d17098c754367b844ec646ebececc7c74. I reviewed the code, ran tests and did a successful reindex on testnet with this branch. Tree-SHA512: 695529e0af38bae2af4e0cc2895dda56a71b9059c3de04d32e09c0165a50f6aacee499f2042156ab5eaa6f0349bab6bcca4ef9f6f9ded4e60d4483beab7e4554
2019-09-26 13:37:06 +02:00
nReadPos = nPos;
return true;
}
//! prevent reading beyond a certain position
//! no argument removes the limit
bool SetLimit(uint64_t nPos = std::numeric_limits<uint64_t>::max()) {
if (nPos < nReadPos)
return false;
nReadLimit = nPos;
return true;
}
template<typename T>
CBufferedFile& operator>>(T&& obj) {
// Unserialize from this stream
::Unserialize(*this, obj);
return (*this);
}
//! search for a given byte in the stream, and remain positioned on it
void FindByte(char ch) {
while (true) {
if (nReadPos == nSrcPos)
Fill();
if (vchBuf[nReadPos % vchBuf.size()] == ch)
break;
nReadPos++;
}
}
};
#endif // BITCOIN_STREAMS_H