dash/src/streams.h

873 lines
25 KiB
C
Raw Normal View History

// Copyright (c) 2009-2010 Satoshi Nakamoto
// Copyright (c) 2009-2015 The Bitcoin Core developers
2014-12-13 05:09:33 +01:00
// Distributed under the MIT software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
#ifndef BITCOIN_STREAMS_H
#define BITCOIN_STREAMS_H
Backport 11651 (#3358) * scripted-diff: Replace #include "" with #include <> (ryanofsky) -BEGIN VERIFY SCRIPT- for f in \ src/*.cpp \ src/*.h \ src/bench/*.cpp \ src/bench/*.h \ src/compat/*.cpp \ src/compat/*.h \ src/consensus/*.cpp \ src/consensus/*.h \ src/crypto/*.cpp \ src/crypto/*.h \ src/crypto/ctaes/*.h \ src/policy/*.cpp \ src/policy/*.h \ src/primitives/*.cpp \ src/primitives/*.h \ src/qt/*.cpp \ src/qt/*.h \ src/qt/test/*.cpp \ src/qt/test/*.h \ src/rpc/*.cpp \ src/rpc/*.h \ src/script/*.cpp \ src/script/*.h \ src/support/*.cpp \ src/support/*.h \ src/support/allocators/*.h \ src/test/*.cpp \ src/test/*.h \ src/wallet/*.cpp \ src/wallet/*.h \ src/wallet/test/*.cpp \ src/wallet/test/*.h \ src/zmq/*.cpp \ src/zmq/*.h do base=${f%/*}/ relbase=${base#src/} sed -i "s:#include \"\(.*\)\"\(.*\):if test -e \$base'\\1'; then echo \"#include <\"\$relbase\"\\1>\\2\"; else echo \"#include <\\1>\\2\"; fi:e" $f done -END VERIFY SCRIPT- Signed-off-by: Pasta <pasta@dashboost.org> * scripted-diff: Replace #include "" with #include <> (Dash Specific) -BEGIN VERIFY SCRIPT- for f in \ src/bls/*.cpp \ src/bls/*.h \ src/evo/*.cpp \ src/evo/*.h \ src/governance/*.cpp \ src/governance/*.h \ src/llmq/*.cpp \ src/llmq/*.h \ src/masternode/*.cpp \ src/masternode/*.h \ src/privatesend/*.cpp \ src/privatesend/*.h do base=${f%/*}/ relbase=${base#src/} sed -i "s:#include \"\(.*\)\"\(.*\):if test -e \$base'\\1'; then echo \"#include <\"\$relbase\"\\1>\\2\"; else echo \"#include <\\1>\\2\"; fi:e" $f done -END VERIFY SCRIPT- Signed-off-by: Pasta <pasta@dashboost.org> * build: Remove -I for everything but project root Remove -I from build system for everything but the project root, and built-in dependencies. Signed-off-by: Pasta <pasta@dashboost.org> # Conflicts: # src/Makefile.test.include * qt: refactor: Use absolute include paths in .ui files * qt: refactor: Changes to make include paths absolute This makes all include paths in the GUI absolute. Many changes are involved as every single source file in src/qt/ assumes to be able to use relative includes. Signed-off-by: Pasta <pasta@dashboost.org> # Conflicts: # src/qt/dash.cpp # src/qt/optionsmodel.cpp # src/qt/test/rpcnestedtests.cpp * test: refactor: Use absolute include paths for test data files * Recommend #include<> syntax in developer notes * refactor: Include obj/build.h instead of build.h * END BACKPORT #11651 Remove trailing whitespace causing travis failure * fix backport 11651 Signed-off-by: Pasta <pasta@dashboost.org> * More of 11651 * fix blockchain.cpp Signed-off-by: pasta <pasta@dashboost.org> * Add missing "qt/" in includes * Add missing "test/" in includes * Fix trailing whitespaces Co-authored-by: Wladimir J. van der Laan <laanwj@gmail.com> Co-authored-by: Russell Yanofsky <russ@yanofsky.org> Co-authored-by: MeshCollider <dobsonsa68@gmail.com> Co-authored-by: UdjinM6 <UdjinM6@users.noreply.github.com>
2020-03-19 23:46:56 +01:00
#include <support/allocators/zeroafterfree.h>
#include <serialize.h>
#include <algorithm>
#include <assert.h>
#include <ios>
#include <limits>
#include <map>
#include <set>
#include <stdint.h>
#include <stdio.h>
#include <string>
#include <string.h>
#include <utility>
#include <vector>
template<typename Stream>
class OverrideStream
{
Stream* stream;
const int nType;
const int nVersion;
public:
OverrideStream(Stream* stream_, int nType_, int nVersion_) : stream(stream_), nType(nType_), nVersion(nVersion_) {}
template<typename T>
OverrideStream<Stream>& operator<<(const T& obj)
{
// Serialize to this stream
::Serialize(*this, obj);
return (*this);
}
template<typename T>
OverrideStream<Stream>& operator>>(T&& obj)
{
// Unserialize from this stream
::Unserialize(*this, obj);
return (*this);
}
void write(const char* pch, size_t nSize)
{
stream->write(pch, nSize);
}
void read(char* pch, size_t nSize)
{
stream->read(pch, nSize);
}
int GetVersion() const { return nVersion; }
int GetType() const { return nType; }
};
/* Minimal stream for overwriting and/or appending to an existing byte vector
*
* The referenced vector will grow as necessary
*/
class CVectorWriter
{
public:
/*
* @param[in] nTypeIn Serialization Type
* @param[in] nVersionIn Serialization Version (including any flags)
* @param[in] vchDataIn Referenced byte vector to overwrite/append
* @param[in] nPosIn Starting position. Vector index where writes should start. The vector will initially
* grow as necessary to max(nPosIn, vec.size()). So to append, use vec.size().
*/
CVectorWriter(int nTypeIn, int nVersionIn, std::vector<unsigned char>& vchDataIn, size_t nPosIn) : nType(nTypeIn), nVersion(nVersionIn), vchData(vchDataIn), nPos(nPosIn)
{
if(nPos > vchData.size())
vchData.resize(nPos);
}
/*
* (other params same as above)
* @param[in] args A list of items to serialize starting at nPosIn.
*/
template <typename... Args>
CVectorWriter(int nTypeIn, int nVersionIn, std::vector<unsigned char>& vchDataIn, size_t nPosIn, Args&&... args) : CVectorWriter(nTypeIn, nVersionIn, vchDataIn, nPosIn)
{
::SerializeMany(*this, std::forward<Args>(args)...);
}
void write(const char* pch, size_t nSize)
{
assert(nPos <= vchData.size());
size_t nOverwrite = std::min(nSize, vchData.size() - nPos);
if (nOverwrite) {
memcpy(vchData.data() + nPos, reinterpret_cast<const unsigned char*>(pch), nOverwrite);
}
if (nOverwrite < nSize) {
vchData.insert(vchData.end(), reinterpret_cast<const unsigned char*>(pch) + nOverwrite, reinterpret_cast<const unsigned char*>(pch) + nSize);
}
nPos += nSize;
}
template<typename T>
CVectorWriter& operator<<(const T& obj)
{
// Serialize to this stream
::Serialize(*this, obj);
return (*this);
}
int GetVersion() const
{
return nVersion;
}
int GetType() const
{
return nType;
}
void seek(size_t nSize)
{
nPos += nSize;
if(nPos > vchData.size())
vchData.resize(nPos);
}
size_t size() const
{
return vchData.size() - nPos;
}
private:
const int nType;
const int nVersion;
std::vector<unsigned char>& vchData;
size_t nPos;
};
Merge #12254: BIP 158: Compact Block Filters for Light Clients 254c85b68794ada713dbdae415db72adf5fcbaf3 bench: Benchmark GCS filter creation and matching. (Jim Posen) f33b717a85363e067316c133a542559d2f4aaeca blockfilter: Optimization on compilers with int128 support. (Jim Posen) 97b64d67daf0336dfb64b132f3e4d6a4c1967da4 blockfilter: Unit test against BIP 158 test vectors. (Jim Posen) a4afb9cadbaecb0676e6475ab8d32a52faecb47a blockfilter: Additional helper methods to compute hash and header. (Jim Posen) cd09c7925b5af4104834971cfe072251e3ac2bda blockfilter: Serialization methods on BlockFilter. (Jim Posen) c1855f6052aca806fdb51be01b30dfeee8b55f40 blockfilter: Construction of basic block filters. (Jim Posen) 53e7874e079f9ddfe8b176f11d46e6b59c7283d5 blockfilter: Simple test for GCSFilter construction and Match. (Jim Posen) 558c536e35a25594881693e6ff01d275c88d7af1 blockfilter: Implement GCSFilter Match methods. (Jim Posen) cf70b550054eed36f194eaa13f4a9cb31e32df38 blockfilter: Implement GCSFilter constructors. (Jim Posen) c454f0ac63c6028f54c7eb51683b3ccdb475b19b blockfilter: Declare GCSFilter class for BIP 158 impl. (Jim Posen) 9b622dc72279b027c59d6541cddff53800fc689b streams: Unit tests for BitStreamReader and BitStreamWriter. (Jim Posen) fe943f99bf0a2bbb12e30bc4803c0337e3c95b93 streams: Implement BitStreamReader/Writer classes. (Jim Posen) 87f2d9ee43a9220076b1959d1ca65245d9591be9 streams: Unit test for VectorReader class. (Jim Posen) 947133dec92cd25ec2b3358c09b8614ba6fb40d4 streams: Create VectorReader stream interface for vectors. (Jim Posen) Pull request description: This implements the compact block filter construction in [BIP 158](https://github.com/bitcoin/bips/blob/master/bip-0158.mediawiki). The code is not used anywhere in the Bitcoin Core code base yet. The next step towards [BIP 157](https://github.com/bitcoin/bips/blob/master/bip-0157.mediawiki) support would be to create an indexing module similar to `TxIndex` that constructs the basic and extended filters for each validated block. ### Filter Sizes [Here](https://gateway.ipfs.io/ipfs/QmRqaAAQZ5ZX5eqxP7J2R1MzFrc2WDdKSWJEKtQzyawqog) is a CSV of filter sizes for blocks in the main chain. As you can see below, the ratio of filter size to block size drops after the first ~150,000 blocks: ![filter_sizes](https://user-images.githubusercontent.com/881253/42900589-299772d4-8a7e-11e8-886d-0d4f3f4fbe44.png) The reason for the relatively large filter sizes is that Golomb-coded sets only achieve good compression with a sufficient number of elements. Empirically, the average element size with 100 elements is 14% larger than with 10,000 elements. The ratio of filter size to block size is computed without witness data for basic filters. Here is a summary table of filter size ratios *for blocks after height 150,000*: | Stat | Filter Type | |-------|--------------| | Weighted Size Ratio Mean | 0.0198 | | Size Ratio Mean | 0.0224 | | Size Ratio Std Deviation | 0.0202 | | Mean Element Size (bits) | 21.145 | | Approx Theoretical Min Element Size (bits) | 21.025 | Tree-SHA512: 2d045fbfc3fc45490ecb9b08d2f7e4dbbe7cd8c1c939f06bbdb8e8aacfe4c495cdb67c820e52520baebbf8a8305a0efd8e59d3fa8e367574a4b830509a39223f
2018-08-26 16:57:01 +02:00
/** Minimal stream for reading from an existing vector by reference
*/
class VectorReader
{
private:
const int m_type;
const int m_version;
const std::vector<unsigned char>& m_data;
size_t m_pos = 0;
public:
/*
* @param[in] type Serialization Type
* @param[in] version Serialization Version (including any flags)
* @param[in] data Referenced byte vector to overwrite/append
* @param[in] pos Starting position. Vector index where reads should start.
*/
VectorReader(int type, int version, const std::vector<unsigned char>& data, size_t pos)
: m_type(type), m_version(version), m_data(data)
{
seek(pos);
}
/*
* (other params same as above)
* @param[in] args A list of items to deserialize starting at pos.
*/
template <typename... Args>
VectorReader(int type, int version, const std::vector<unsigned char>& data, size_t pos,
Args&&... args)
: VectorReader(type, version, data, pos)
{
::UnserializeMany(*this, std::forward<Args>(args)...);
}
template<typename T>
VectorReader& operator>>(T& obj)
{
// Unserialize from this stream
::Unserialize(*this, obj);
return (*this);
}
int GetVersion() const { return m_version; }
int GetType() const { return m_type; }
size_t size() const { return m_data.size() - m_pos; }
bool empty() const { return m_data.size() == m_pos; }
void read(char* dst, size_t n)
{
if (n == 0) {
return;
}
// Read from the beginning of the buffer
size_t pos_next = m_pos + n;
if (pos_next > m_data.size()) {
throw std::ios_base::failure("VectorReader::read(): end of data");
}
memcpy(dst, m_data.data() + m_pos, n);
m_pos = pos_next;
}
void seek(size_t n)
{
m_pos += n;
if (m_pos > m_data.size()) {
throw std::ios_base::failure("VectorReader::seek(): end of data");
}
}
};
/** Double ended buffer combining vector and stream-like interfaces.
*
* >> and << read and write unformatted data using the above serialization templates.
* Fills with data in linear time; some stringstream implementations take N^2 time.
*/
class CDataStream
{
protected:
typedef CSerializeData vector_type;
vector_type vch;
unsigned int nReadPos;
int nType;
int nVersion;
public:
typedef vector_type::allocator_type allocator_type;
typedef vector_type::size_type size_type;
typedef vector_type::difference_type difference_type;
typedef vector_type::reference reference;
typedef vector_type::const_reference const_reference;
typedef vector_type::value_type value_type;
typedef vector_type::iterator iterator;
typedef vector_type::const_iterator const_iterator;
typedef vector_type::reverse_iterator reverse_iterator;
explicit CDataStream(int nTypeIn, int nVersionIn)
{
Init(nTypeIn, nVersionIn);
}
CDataStream(const_iterator pbegin, const_iterator pend, int nTypeIn, int nVersionIn) : vch(pbegin, pend)
{
Init(nTypeIn, nVersionIn);
}
CDataStream(const char* pbegin, const char* pend, int nTypeIn, int nVersionIn) : vch(pbegin, pend)
{
Init(nTypeIn, nVersionIn);
}
CDataStream(const vector_type& vchIn, int nTypeIn, int nVersionIn) : vch(vchIn.begin(), vchIn.end())
{
Init(nTypeIn, nVersionIn);
}
CDataStream(const std::vector<char>& vchIn, int nTypeIn, int nVersionIn) : vch(vchIn.begin(), vchIn.end())
{
Init(nTypeIn, nVersionIn);
}
CDataStream(const std::vector<unsigned char>& vchIn, int nTypeIn, int nVersionIn) : vch(vchIn.begin(), vchIn.end())
{
Init(nTypeIn, nVersionIn);
}
Backport Bitcoin PR#8708: net: have CConnman handle message sending (#1553) * serialization: teach serializers variadics Also add a variadic CDataStream ctor for ease-of-use. * connman is in charge of pushing messages The changes here are dense and subtle, but hopefully all is more explicit than before. - CConnman is now in charge of sending data rather than the nodes themselves. This is necessary because many decisions need to be made with all nodes in mind, and a model that requires the nodes calling up to their manager quickly turns to spaghetti. - The per-node-serializer (ssSend) has been replaced with a (quasi-)const send-version. Since the send version for serialization can only change once per connection, we now explicitly tag messages with INIT_PROTO_VERSION if they are sent before the handshake. With this done, there's no need to lock for access to nSendVersion. Also, a new stream is used for each message, so there's no need to lock during the serialization process. - This takes care of accounting for optimistic sends, so the nOptimisticBytesWritten hack can be removed. - -dropmessagestest and -fuzzmessagestest have not been preserved, as I suspect they haven't been used in years. * net: switch all callers to connman for pushing messages Drop all of the old stuff. * drop the optimistic write counter hack This is now handled properly in realtime. * net: remove now-unused ssSend and Fuzz * net: construct CNodeStates in place * net: handle version push in InitializeNode
2017-07-27 16:28:05 +02:00
template <typename... Args>
CDataStream(int nTypeIn, int nVersionIn, Args&&... args)
{
Init(nTypeIn, nVersionIn);
::SerializeMany(*this, std::forward<Args>(args)...);
Backport Bitcoin PR#8708: net: have CConnman handle message sending (#1553) * serialization: teach serializers variadics Also add a variadic CDataStream ctor for ease-of-use. * connman is in charge of pushing messages The changes here are dense and subtle, but hopefully all is more explicit than before. - CConnman is now in charge of sending data rather than the nodes themselves. This is necessary because many decisions need to be made with all nodes in mind, and a model that requires the nodes calling up to their manager quickly turns to spaghetti. - The per-node-serializer (ssSend) has been replaced with a (quasi-)const send-version. Since the send version for serialization can only change once per connection, we now explicitly tag messages with INIT_PROTO_VERSION if they are sent before the handshake. With this done, there's no need to lock for access to nSendVersion. Also, a new stream is used for each message, so there's no need to lock during the serialization process. - This takes care of accounting for optimistic sends, so the nOptimisticBytesWritten hack can be removed. - -dropmessagestest and -fuzzmessagestest have not been preserved, as I suspect they haven't been used in years. * net: switch all callers to connman for pushing messages Drop all of the old stuff. * drop the optimistic write counter hack This is now handled properly in realtime. * net: remove now-unused ssSend and Fuzz * net: construct CNodeStates in place * net: handle version push in InitializeNode
2017-07-27 16:28:05 +02:00
}
void Init(int nTypeIn, int nVersionIn)
{
nReadPos = 0;
nType = nTypeIn;
nVersion = nVersionIn;
}
CDataStream& operator+=(const CDataStream& b)
{
vch.insert(vch.end(), b.begin(), b.end());
return *this;
}
friend CDataStream operator+(const CDataStream& a, const CDataStream& b)
{
CDataStream ret = a;
ret += b;
return (ret);
}
std::string str() const
{
return (std::string(begin(), end()));
}
//
// Vector subset
//
const_iterator begin() const { return vch.begin() + nReadPos; }
iterator begin() { return vch.begin() + nReadPos; }
const_iterator end() const { return vch.end(); }
iterator end() { return vch.end(); }
size_type size() const { return vch.size() - nReadPos; }
bool empty() const { return vch.size() == nReadPos; }
void resize(size_type n, value_type c=0) { vch.resize(n + nReadPos, c); }
void reserve(size_type n) { vch.reserve(n + nReadPos); }
const_reference operator[](size_type pos) const { return vch[pos + nReadPos]; }
reference operator[](size_type pos) { return vch[pos + nReadPos]; }
void clear() { vch.clear(); nReadPos = 0; }
iterator insert(iterator it, const char x=char()) { return vch.insert(it, x); }
void insert(iterator it, size_type n, const char x) { vch.insert(it, n, x); }
value_type* data() { return vch.data() + nReadPos; }
const value_type* data() const { return vch.data() + nReadPos; }
void insert(iterator it, std::vector<char>::const_iterator first, std::vector<char>::const_iterator last)
{
if (last == first) return;
assert(last - first > 0);
if (it == vch.begin() + nReadPos && (unsigned int)(last - first) <= nReadPos)
{
// special case for inserting at the front when there's room
nReadPos -= (last - first);
memcpy(&vch[nReadPos], &first[0], last - first);
}
else
vch.insert(it, first, last);
}
void insert(iterator it, const char* first, const char* last)
{
if (last == first) return;
assert(last - first > 0);
if (it == vch.begin() + nReadPos && (unsigned int)(last - first) <= nReadPos)
{
// special case for inserting at the front when there's room
nReadPos -= (last - first);
memcpy(&vch[nReadPos], &first[0], last - first);
}
else
vch.insert(it, first, last);
}
iterator erase(iterator it)
{
if (it == vch.begin() + nReadPos)
{
// special case for erasing from the front
if (++nReadPos >= vch.size())
{
// whenever we reach the end, we take the opportunity to clear the buffer
nReadPos = 0;
return vch.erase(vch.begin(), vch.end());
}
return vch.begin() + nReadPos;
}
else
return vch.erase(it);
}
iterator erase(iterator first, iterator last)
{
if (first == vch.begin() + nReadPos)
{
// special case for erasing from the front
if (last == vch.end())
{
nReadPos = 0;
return vch.erase(vch.begin(), vch.end());
}
else
{
nReadPos = (last - vch.begin());
return last;
}
}
else
return vch.erase(first, last);
}
inline void Compact()
{
vch.erase(vch.begin(), vch.begin() + nReadPos);
nReadPos = 0;
}
bool Rewind(size_type n)
{
// Rewind by n characters if the buffer hasn't been compacted yet
if (n > nReadPos)
return false;
nReadPos -= n;
return true;
}
//
// Stream subset
//
bool eof() const { return size() == 0; }
CDataStream* rdbuf() { return this; }
int in_avail() const { return size(); }
void SetType(int n) { nType = n; }
int GetType() const { return nType; }
void SetVersion(int n) { nVersion = n; }
int GetVersion() const { return nVersion; }
void read(char* pch, size_t nSize)
{
if (nSize == 0) return;
// Read from the beginning of the buffer
unsigned int nReadPosNext = nReadPos + nSize;
if (nReadPosNext > vch.size()) {
throw std::ios_base::failure("CDataStream::read(): end of data");
}
memcpy(pch, &vch[nReadPos], nSize);
if (nReadPosNext == vch.size())
{
nReadPos = 0;
vch.clear();
return;
}
nReadPos = nReadPosNext;
}
void ignore(int nSize)
{
// Ignore from the beginning of the buffer
if (nSize < 0) {
throw std::ios_base::failure("CDataStream::ignore(): nSize negative");
}
unsigned int nReadPosNext = nReadPos + nSize;
if (nReadPosNext >= vch.size())
{
if (nReadPosNext > vch.size())
throw std::ios_base::failure("CDataStream::ignore(): end of data");
nReadPos = 0;
vch.clear();
return;
}
nReadPos = nReadPosNext;
}
void write(const char* pch, size_t nSize)
{
// Write to the end of the buffer
vch.insert(vch.end(), pch, pch + nSize);
}
template<typename Stream>
void Serialize(Stream& s) const
{
// Special case: stream << stream concatenates like stream += stream
if (!vch.empty())
s.write((char*)vch.data(), vch.size() * sizeof(value_type));
}
template<typename T>
CDataStream& operator<<(const T& obj)
{
// Serialize to this stream
::Serialize(*this, obj);
return (*this);
}
template<typename T>
CDataStream& operator>>(T&& obj)
{
// Unserialize from this stream
::Unserialize(*this, obj);
return (*this);
}
void GetAndClear(CSerializeData &d) {
d.insert(d.end(), begin(), end());
clear();
}
/**
* XOR the contents of this stream with a certain key.
*
* @param[in] key The key used to XOR the data in this stream.
*/
void Xor(const std::vector<unsigned char>& key)
{
if (key.size() == 0) {
return;
}
for (size_type i = 0, j = 0; i != size(); i++) {
vch[i] ^= key[j++];
// This potentially acts on very many bytes of data, so it's
// important that we calculate `j`, i.e. the `key` index in this
// way instead of doing a %, which would effectively be a division
// for each byte Xor'd -- much slower than need be.
if (j == key.size())
j = 0;
}
}
};
Merge #12254: BIP 158: Compact Block Filters for Light Clients 254c85b68794ada713dbdae415db72adf5fcbaf3 bench: Benchmark GCS filter creation and matching. (Jim Posen) f33b717a85363e067316c133a542559d2f4aaeca blockfilter: Optimization on compilers with int128 support. (Jim Posen) 97b64d67daf0336dfb64b132f3e4d6a4c1967da4 blockfilter: Unit test against BIP 158 test vectors. (Jim Posen) a4afb9cadbaecb0676e6475ab8d32a52faecb47a blockfilter: Additional helper methods to compute hash and header. (Jim Posen) cd09c7925b5af4104834971cfe072251e3ac2bda blockfilter: Serialization methods on BlockFilter. (Jim Posen) c1855f6052aca806fdb51be01b30dfeee8b55f40 blockfilter: Construction of basic block filters. (Jim Posen) 53e7874e079f9ddfe8b176f11d46e6b59c7283d5 blockfilter: Simple test for GCSFilter construction and Match. (Jim Posen) 558c536e35a25594881693e6ff01d275c88d7af1 blockfilter: Implement GCSFilter Match methods. (Jim Posen) cf70b550054eed36f194eaa13f4a9cb31e32df38 blockfilter: Implement GCSFilter constructors. (Jim Posen) c454f0ac63c6028f54c7eb51683b3ccdb475b19b blockfilter: Declare GCSFilter class for BIP 158 impl. (Jim Posen) 9b622dc72279b027c59d6541cddff53800fc689b streams: Unit tests for BitStreamReader and BitStreamWriter. (Jim Posen) fe943f99bf0a2bbb12e30bc4803c0337e3c95b93 streams: Implement BitStreamReader/Writer classes. (Jim Posen) 87f2d9ee43a9220076b1959d1ca65245d9591be9 streams: Unit test for VectorReader class. (Jim Posen) 947133dec92cd25ec2b3358c09b8614ba6fb40d4 streams: Create VectorReader stream interface for vectors. (Jim Posen) Pull request description: This implements the compact block filter construction in [BIP 158](https://github.com/bitcoin/bips/blob/master/bip-0158.mediawiki). The code is not used anywhere in the Bitcoin Core code base yet. The next step towards [BIP 157](https://github.com/bitcoin/bips/blob/master/bip-0157.mediawiki) support would be to create an indexing module similar to `TxIndex` that constructs the basic and extended filters for each validated block. ### Filter Sizes [Here](https://gateway.ipfs.io/ipfs/QmRqaAAQZ5ZX5eqxP7J2R1MzFrc2WDdKSWJEKtQzyawqog) is a CSV of filter sizes for blocks in the main chain. As you can see below, the ratio of filter size to block size drops after the first ~150,000 blocks: ![filter_sizes](https://user-images.githubusercontent.com/881253/42900589-299772d4-8a7e-11e8-886d-0d4f3f4fbe44.png) The reason for the relatively large filter sizes is that Golomb-coded sets only achieve good compression with a sufficient number of elements. Empirically, the average element size with 100 elements is 14% larger than with 10,000 elements. The ratio of filter size to block size is computed without witness data for basic filters. Here is a summary table of filter size ratios *for blocks after height 150,000*: | Stat | Filter Type | |-------|--------------| | Weighted Size Ratio Mean | 0.0198 | | Size Ratio Mean | 0.0224 | | Size Ratio Std Deviation | 0.0202 | | Mean Element Size (bits) | 21.145 | | Approx Theoretical Min Element Size (bits) | 21.025 | Tree-SHA512: 2d045fbfc3fc45490ecb9b08d2f7e4dbbe7cd8c1c939f06bbdb8e8aacfe4c495cdb67c820e52520baebbf8a8305a0efd8e59d3fa8e367574a4b830509a39223f
2018-08-26 16:57:01 +02:00
template <typename IStream>
class BitStreamReader
{
private:
IStream& m_istream;
/// Buffered byte read in from the input stream. A new byte is read into the
/// buffer when m_offset reaches 8.
uint8_t m_buffer{0};
/// Number of high order bits in m_buffer already returned by previous
/// Read() calls. The next bit to be returned is at this offset from the
/// most significant bit position.
int m_offset{8};
public:
explicit BitStreamReader(IStream& istream) : m_istream(istream) {}
/** Read the specified number of bits from the stream. The data is returned
* in the nbits least signficant bits of a 64-bit uint.
*/
uint64_t Read(int nbits) {
if (nbits < 0 || nbits > 64) {
throw std::out_of_range("nbits must be between 0 and 64");
}
uint64_t data = 0;
while (nbits > 0) {
if (m_offset == 8) {
m_istream >> m_buffer;
m_offset = 0;
}
Merge #12254: BIP 158: Compact Block Filters for Light Clients 254c85b68794ada713dbdae415db72adf5fcbaf3 bench: Benchmark GCS filter creation and matching. (Jim Posen) f33b717a85363e067316c133a542559d2f4aaeca blockfilter: Optimization on compilers with int128 support. (Jim Posen) 97b64d67daf0336dfb64b132f3e4d6a4c1967da4 blockfilter: Unit test against BIP 158 test vectors. (Jim Posen) a4afb9cadbaecb0676e6475ab8d32a52faecb47a blockfilter: Additional helper methods to compute hash and header. (Jim Posen) cd09c7925b5af4104834971cfe072251e3ac2bda blockfilter: Serialization methods on BlockFilter. (Jim Posen) c1855f6052aca806fdb51be01b30dfeee8b55f40 blockfilter: Construction of basic block filters. (Jim Posen) 53e7874e079f9ddfe8b176f11d46e6b59c7283d5 blockfilter: Simple test for GCSFilter construction and Match. (Jim Posen) 558c536e35a25594881693e6ff01d275c88d7af1 blockfilter: Implement GCSFilter Match methods. (Jim Posen) cf70b550054eed36f194eaa13f4a9cb31e32df38 blockfilter: Implement GCSFilter constructors. (Jim Posen) c454f0ac63c6028f54c7eb51683b3ccdb475b19b blockfilter: Declare GCSFilter class for BIP 158 impl. (Jim Posen) 9b622dc72279b027c59d6541cddff53800fc689b streams: Unit tests for BitStreamReader and BitStreamWriter. (Jim Posen) fe943f99bf0a2bbb12e30bc4803c0337e3c95b93 streams: Implement BitStreamReader/Writer classes. (Jim Posen) 87f2d9ee43a9220076b1959d1ca65245d9591be9 streams: Unit test for VectorReader class. (Jim Posen) 947133dec92cd25ec2b3358c09b8614ba6fb40d4 streams: Create VectorReader stream interface for vectors. (Jim Posen) Pull request description: This implements the compact block filter construction in [BIP 158](https://github.com/bitcoin/bips/blob/master/bip-0158.mediawiki). The code is not used anywhere in the Bitcoin Core code base yet. The next step towards [BIP 157](https://github.com/bitcoin/bips/blob/master/bip-0157.mediawiki) support would be to create an indexing module similar to `TxIndex` that constructs the basic and extended filters for each validated block. ### Filter Sizes [Here](https://gateway.ipfs.io/ipfs/QmRqaAAQZ5ZX5eqxP7J2R1MzFrc2WDdKSWJEKtQzyawqog) is a CSV of filter sizes for blocks in the main chain. As you can see below, the ratio of filter size to block size drops after the first ~150,000 blocks: ![filter_sizes](https://user-images.githubusercontent.com/881253/42900589-299772d4-8a7e-11e8-886d-0d4f3f4fbe44.png) The reason for the relatively large filter sizes is that Golomb-coded sets only achieve good compression with a sufficient number of elements. Empirically, the average element size with 100 elements is 14% larger than with 10,000 elements. The ratio of filter size to block size is computed without witness data for basic filters. Here is a summary table of filter size ratios *for blocks after height 150,000*: | Stat | Filter Type | |-------|--------------| | Weighted Size Ratio Mean | 0.0198 | | Size Ratio Mean | 0.0224 | | Size Ratio Std Deviation | 0.0202 | | Mean Element Size (bits) | 21.145 | | Approx Theoretical Min Element Size (bits) | 21.025 | Tree-SHA512: 2d045fbfc3fc45490ecb9b08d2f7e4dbbe7cd8c1c939f06bbdb8e8aacfe4c495cdb67c820e52520baebbf8a8305a0efd8e59d3fa8e367574a4b830509a39223f
2018-08-26 16:57:01 +02:00
int bits = std::min(8 - m_offset, nbits);
data <<= bits;
data |= static_cast<uint8_t>(m_buffer << m_offset) >> (8 - bits);
m_offset += bits;
nbits -= bits;
}
return data;
}
};
Merge #12254: BIP 158: Compact Block Filters for Light Clients 254c85b68794ada713dbdae415db72adf5fcbaf3 bench: Benchmark GCS filter creation and matching. (Jim Posen) f33b717a85363e067316c133a542559d2f4aaeca blockfilter: Optimization on compilers with int128 support. (Jim Posen) 97b64d67daf0336dfb64b132f3e4d6a4c1967da4 blockfilter: Unit test against BIP 158 test vectors. (Jim Posen) a4afb9cadbaecb0676e6475ab8d32a52faecb47a blockfilter: Additional helper methods to compute hash and header. (Jim Posen) cd09c7925b5af4104834971cfe072251e3ac2bda blockfilter: Serialization methods on BlockFilter. (Jim Posen) c1855f6052aca806fdb51be01b30dfeee8b55f40 blockfilter: Construction of basic block filters. (Jim Posen) 53e7874e079f9ddfe8b176f11d46e6b59c7283d5 blockfilter: Simple test for GCSFilter construction and Match. (Jim Posen) 558c536e35a25594881693e6ff01d275c88d7af1 blockfilter: Implement GCSFilter Match methods. (Jim Posen) cf70b550054eed36f194eaa13f4a9cb31e32df38 blockfilter: Implement GCSFilter constructors. (Jim Posen) c454f0ac63c6028f54c7eb51683b3ccdb475b19b blockfilter: Declare GCSFilter class for BIP 158 impl. (Jim Posen) 9b622dc72279b027c59d6541cddff53800fc689b streams: Unit tests for BitStreamReader and BitStreamWriter. (Jim Posen) fe943f99bf0a2bbb12e30bc4803c0337e3c95b93 streams: Implement BitStreamReader/Writer classes. (Jim Posen) 87f2d9ee43a9220076b1959d1ca65245d9591be9 streams: Unit test for VectorReader class. (Jim Posen) 947133dec92cd25ec2b3358c09b8614ba6fb40d4 streams: Create VectorReader stream interface for vectors. (Jim Posen) Pull request description: This implements the compact block filter construction in [BIP 158](https://github.com/bitcoin/bips/blob/master/bip-0158.mediawiki). The code is not used anywhere in the Bitcoin Core code base yet. The next step towards [BIP 157](https://github.com/bitcoin/bips/blob/master/bip-0157.mediawiki) support would be to create an indexing module similar to `TxIndex` that constructs the basic and extended filters for each validated block. ### Filter Sizes [Here](https://gateway.ipfs.io/ipfs/QmRqaAAQZ5ZX5eqxP7J2R1MzFrc2WDdKSWJEKtQzyawqog) is a CSV of filter sizes for blocks in the main chain. As you can see below, the ratio of filter size to block size drops after the first ~150,000 blocks: ![filter_sizes](https://user-images.githubusercontent.com/881253/42900589-299772d4-8a7e-11e8-886d-0d4f3f4fbe44.png) The reason for the relatively large filter sizes is that Golomb-coded sets only achieve good compression with a sufficient number of elements. Empirically, the average element size with 100 elements is 14% larger than with 10,000 elements. The ratio of filter size to block size is computed without witness data for basic filters. Here is a summary table of filter size ratios *for blocks after height 150,000*: | Stat | Filter Type | |-------|--------------| | Weighted Size Ratio Mean | 0.0198 | | Size Ratio Mean | 0.0224 | | Size Ratio Std Deviation | 0.0202 | | Mean Element Size (bits) | 21.145 | | Approx Theoretical Min Element Size (bits) | 21.025 | Tree-SHA512: 2d045fbfc3fc45490ecb9b08d2f7e4dbbe7cd8c1c939f06bbdb8e8aacfe4c495cdb67c820e52520baebbf8a8305a0efd8e59d3fa8e367574a4b830509a39223f
2018-08-26 16:57:01 +02:00
template <typename OStream>
class BitStreamWriter
{
private:
OStream& m_ostream;
Merge #12254: BIP 158: Compact Block Filters for Light Clients 254c85b68794ada713dbdae415db72adf5fcbaf3 bench: Benchmark GCS filter creation and matching. (Jim Posen) f33b717a85363e067316c133a542559d2f4aaeca blockfilter: Optimization on compilers with int128 support. (Jim Posen) 97b64d67daf0336dfb64b132f3e4d6a4c1967da4 blockfilter: Unit test against BIP 158 test vectors. (Jim Posen) a4afb9cadbaecb0676e6475ab8d32a52faecb47a blockfilter: Additional helper methods to compute hash and header. (Jim Posen) cd09c7925b5af4104834971cfe072251e3ac2bda blockfilter: Serialization methods on BlockFilter. (Jim Posen) c1855f6052aca806fdb51be01b30dfeee8b55f40 blockfilter: Construction of basic block filters. (Jim Posen) 53e7874e079f9ddfe8b176f11d46e6b59c7283d5 blockfilter: Simple test for GCSFilter construction and Match. (Jim Posen) 558c536e35a25594881693e6ff01d275c88d7af1 blockfilter: Implement GCSFilter Match methods. (Jim Posen) cf70b550054eed36f194eaa13f4a9cb31e32df38 blockfilter: Implement GCSFilter constructors. (Jim Posen) c454f0ac63c6028f54c7eb51683b3ccdb475b19b blockfilter: Declare GCSFilter class for BIP 158 impl. (Jim Posen) 9b622dc72279b027c59d6541cddff53800fc689b streams: Unit tests for BitStreamReader and BitStreamWriter. (Jim Posen) fe943f99bf0a2bbb12e30bc4803c0337e3c95b93 streams: Implement BitStreamReader/Writer classes. (Jim Posen) 87f2d9ee43a9220076b1959d1ca65245d9591be9 streams: Unit test for VectorReader class. (Jim Posen) 947133dec92cd25ec2b3358c09b8614ba6fb40d4 streams: Create VectorReader stream interface for vectors. (Jim Posen) Pull request description: This implements the compact block filter construction in [BIP 158](https://github.com/bitcoin/bips/blob/master/bip-0158.mediawiki). The code is not used anywhere in the Bitcoin Core code base yet. The next step towards [BIP 157](https://github.com/bitcoin/bips/blob/master/bip-0157.mediawiki) support would be to create an indexing module similar to `TxIndex` that constructs the basic and extended filters for each validated block. ### Filter Sizes [Here](https://gateway.ipfs.io/ipfs/QmRqaAAQZ5ZX5eqxP7J2R1MzFrc2WDdKSWJEKtQzyawqog) is a CSV of filter sizes for blocks in the main chain. As you can see below, the ratio of filter size to block size drops after the first ~150,000 blocks: ![filter_sizes](https://user-images.githubusercontent.com/881253/42900589-299772d4-8a7e-11e8-886d-0d4f3f4fbe44.png) The reason for the relatively large filter sizes is that Golomb-coded sets only achieve good compression with a sufficient number of elements. Empirically, the average element size with 100 elements is 14% larger than with 10,000 elements. The ratio of filter size to block size is computed without witness data for basic filters. Here is a summary table of filter size ratios *for blocks after height 150,000*: | Stat | Filter Type | |-------|--------------| | Weighted Size Ratio Mean | 0.0198 | | Size Ratio Mean | 0.0224 | | Size Ratio Std Deviation | 0.0202 | | Mean Element Size (bits) | 21.145 | | Approx Theoretical Min Element Size (bits) | 21.025 | Tree-SHA512: 2d045fbfc3fc45490ecb9b08d2f7e4dbbe7cd8c1c939f06bbdb8e8aacfe4c495cdb67c820e52520baebbf8a8305a0efd8e59d3fa8e367574a4b830509a39223f
2018-08-26 16:57:01 +02:00
/// Buffered byte waiting to be written to the output stream. The byte is
/// written buffer when m_offset reaches 8 or Flush() is called.
uint8_t m_buffer{0};
Merge #12254: BIP 158: Compact Block Filters for Light Clients 254c85b68794ada713dbdae415db72adf5fcbaf3 bench: Benchmark GCS filter creation and matching. (Jim Posen) f33b717a85363e067316c133a542559d2f4aaeca blockfilter: Optimization on compilers with int128 support. (Jim Posen) 97b64d67daf0336dfb64b132f3e4d6a4c1967da4 blockfilter: Unit test against BIP 158 test vectors. (Jim Posen) a4afb9cadbaecb0676e6475ab8d32a52faecb47a blockfilter: Additional helper methods to compute hash and header. (Jim Posen) cd09c7925b5af4104834971cfe072251e3ac2bda blockfilter: Serialization methods on BlockFilter. (Jim Posen) c1855f6052aca806fdb51be01b30dfeee8b55f40 blockfilter: Construction of basic block filters. (Jim Posen) 53e7874e079f9ddfe8b176f11d46e6b59c7283d5 blockfilter: Simple test for GCSFilter construction and Match. (Jim Posen) 558c536e35a25594881693e6ff01d275c88d7af1 blockfilter: Implement GCSFilter Match methods. (Jim Posen) cf70b550054eed36f194eaa13f4a9cb31e32df38 blockfilter: Implement GCSFilter constructors. (Jim Posen) c454f0ac63c6028f54c7eb51683b3ccdb475b19b blockfilter: Declare GCSFilter class for BIP 158 impl. (Jim Posen) 9b622dc72279b027c59d6541cddff53800fc689b streams: Unit tests for BitStreamReader and BitStreamWriter. (Jim Posen) fe943f99bf0a2bbb12e30bc4803c0337e3c95b93 streams: Implement BitStreamReader/Writer classes. (Jim Posen) 87f2d9ee43a9220076b1959d1ca65245d9591be9 streams: Unit test for VectorReader class. (Jim Posen) 947133dec92cd25ec2b3358c09b8614ba6fb40d4 streams: Create VectorReader stream interface for vectors. (Jim Posen) Pull request description: This implements the compact block filter construction in [BIP 158](https://github.com/bitcoin/bips/blob/master/bip-0158.mediawiki). The code is not used anywhere in the Bitcoin Core code base yet. The next step towards [BIP 157](https://github.com/bitcoin/bips/blob/master/bip-0157.mediawiki) support would be to create an indexing module similar to `TxIndex` that constructs the basic and extended filters for each validated block. ### Filter Sizes [Here](https://gateway.ipfs.io/ipfs/QmRqaAAQZ5ZX5eqxP7J2R1MzFrc2WDdKSWJEKtQzyawqog) is a CSV of filter sizes for blocks in the main chain. As you can see below, the ratio of filter size to block size drops after the first ~150,000 blocks: ![filter_sizes](https://user-images.githubusercontent.com/881253/42900589-299772d4-8a7e-11e8-886d-0d4f3f4fbe44.png) The reason for the relatively large filter sizes is that Golomb-coded sets only achieve good compression with a sufficient number of elements. Empirically, the average element size with 100 elements is 14% larger than with 10,000 elements. The ratio of filter size to block size is computed without witness data for basic filters. Here is a summary table of filter size ratios *for blocks after height 150,000*: | Stat | Filter Type | |-------|--------------| | Weighted Size Ratio Mean | 0.0198 | | Size Ratio Mean | 0.0224 | | Size Ratio Std Deviation | 0.0202 | | Mean Element Size (bits) | 21.145 | | Approx Theoretical Min Element Size (bits) | 21.025 | Tree-SHA512: 2d045fbfc3fc45490ecb9b08d2f7e4dbbe7cd8c1c939f06bbdb8e8aacfe4c495cdb67c820e52520baebbf8a8305a0efd8e59d3fa8e367574a4b830509a39223f
2018-08-26 16:57:01 +02:00
/// Number of high order bits in m_buffer already written by previous
/// Write() calls and not yet flushed to the stream. The next bit to be
/// written to is at this offset from the most significant bit position.
int m_offset{0};
Merge #12254: BIP 158: Compact Block Filters for Light Clients 254c85b68794ada713dbdae415db72adf5fcbaf3 bench: Benchmark GCS filter creation and matching. (Jim Posen) f33b717a85363e067316c133a542559d2f4aaeca blockfilter: Optimization on compilers with int128 support. (Jim Posen) 97b64d67daf0336dfb64b132f3e4d6a4c1967da4 blockfilter: Unit test against BIP 158 test vectors. (Jim Posen) a4afb9cadbaecb0676e6475ab8d32a52faecb47a blockfilter: Additional helper methods to compute hash and header. (Jim Posen) cd09c7925b5af4104834971cfe072251e3ac2bda blockfilter: Serialization methods on BlockFilter. (Jim Posen) c1855f6052aca806fdb51be01b30dfeee8b55f40 blockfilter: Construction of basic block filters. (Jim Posen) 53e7874e079f9ddfe8b176f11d46e6b59c7283d5 blockfilter: Simple test for GCSFilter construction and Match. (Jim Posen) 558c536e35a25594881693e6ff01d275c88d7af1 blockfilter: Implement GCSFilter Match methods. (Jim Posen) cf70b550054eed36f194eaa13f4a9cb31e32df38 blockfilter: Implement GCSFilter constructors. (Jim Posen) c454f0ac63c6028f54c7eb51683b3ccdb475b19b blockfilter: Declare GCSFilter class for BIP 158 impl. (Jim Posen) 9b622dc72279b027c59d6541cddff53800fc689b streams: Unit tests for BitStreamReader and BitStreamWriter. (Jim Posen) fe943f99bf0a2bbb12e30bc4803c0337e3c95b93 streams: Implement BitStreamReader/Writer classes. (Jim Posen) 87f2d9ee43a9220076b1959d1ca65245d9591be9 streams: Unit test for VectorReader class. (Jim Posen) 947133dec92cd25ec2b3358c09b8614ba6fb40d4 streams: Create VectorReader stream interface for vectors. (Jim Posen) Pull request description: This implements the compact block filter construction in [BIP 158](https://github.com/bitcoin/bips/blob/master/bip-0158.mediawiki). The code is not used anywhere in the Bitcoin Core code base yet. The next step towards [BIP 157](https://github.com/bitcoin/bips/blob/master/bip-0157.mediawiki) support would be to create an indexing module similar to `TxIndex` that constructs the basic and extended filters for each validated block. ### Filter Sizes [Here](https://gateway.ipfs.io/ipfs/QmRqaAAQZ5ZX5eqxP7J2R1MzFrc2WDdKSWJEKtQzyawqog) is a CSV of filter sizes for blocks in the main chain. As you can see below, the ratio of filter size to block size drops after the first ~150,000 blocks: ![filter_sizes](https://user-images.githubusercontent.com/881253/42900589-299772d4-8a7e-11e8-886d-0d4f3f4fbe44.png) The reason for the relatively large filter sizes is that Golomb-coded sets only achieve good compression with a sufficient number of elements. Empirically, the average element size with 100 elements is 14% larger than with 10,000 elements. The ratio of filter size to block size is computed without witness data for basic filters. Here is a summary table of filter size ratios *for blocks after height 150,000*: | Stat | Filter Type | |-------|--------------| | Weighted Size Ratio Mean | 0.0198 | | Size Ratio Mean | 0.0224 | | Size Ratio Std Deviation | 0.0202 | | Mean Element Size (bits) | 21.145 | | Approx Theoretical Min Element Size (bits) | 21.025 | Tree-SHA512: 2d045fbfc3fc45490ecb9b08d2f7e4dbbe7cd8c1c939f06bbdb8e8aacfe4c495cdb67c820e52520baebbf8a8305a0efd8e59d3fa8e367574a4b830509a39223f
2018-08-26 16:57:01 +02:00
public:
explicit BitStreamWriter(OStream& ostream) : m_ostream(ostream) {}
Merge #12254: BIP 158: Compact Block Filters for Light Clients 254c85b68794ada713dbdae415db72adf5fcbaf3 bench: Benchmark GCS filter creation and matching. (Jim Posen) f33b717a85363e067316c133a542559d2f4aaeca blockfilter: Optimization on compilers with int128 support. (Jim Posen) 97b64d67daf0336dfb64b132f3e4d6a4c1967da4 blockfilter: Unit test against BIP 158 test vectors. (Jim Posen) a4afb9cadbaecb0676e6475ab8d32a52faecb47a blockfilter: Additional helper methods to compute hash and header. (Jim Posen) cd09c7925b5af4104834971cfe072251e3ac2bda blockfilter: Serialization methods on BlockFilter. (Jim Posen) c1855f6052aca806fdb51be01b30dfeee8b55f40 blockfilter: Construction of basic block filters. (Jim Posen) 53e7874e079f9ddfe8b176f11d46e6b59c7283d5 blockfilter: Simple test for GCSFilter construction and Match. (Jim Posen) 558c536e35a25594881693e6ff01d275c88d7af1 blockfilter: Implement GCSFilter Match methods. (Jim Posen) cf70b550054eed36f194eaa13f4a9cb31e32df38 blockfilter: Implement GCSFilter constructors. (Jim Posen) c454f0ac63c6028f54c7eb51683b3ccdb475b19b blockfilter: Declare GCSFilter class for BIP 158 impl. (Jim Posen) 9b622dc72279b027c59d6541cddff53800fc689b streams: Unit tests for BitStreamReader and BitStreamWriter. (Jim Posen) fe943f99bf0a2bbb12e30bc4803c0337e3c95b93 streams: Implement BitStreamReader/Writer classes. (Jim Posen) 87f2d9ee43a9220076b1959d1ca65245d9591be9 streams: Unit test for VectorReader class. (Jim Posen) 947133dec92cd25ec2b3358c09b8614ba6fb40d4 streams: Create VectorReader stream interface for vectors. (Jim Posen) Pull request description: This implements the compact block filter construction in [BIP 158](https://github.com/bitcoin/bips/blob/master/bip-0158.mediawiki). The code is not used anywhere in the Bitcoin Core code base yet. The next step towards [BIP 157](https://github.com/bitcoin/bips/blob/master/bip-0157.mediawiki) support would be to create an indexing module similar to `TxIndex` that constructs the basic and extended filters for each validated block. ### Filter Sizes [Here](https://gateway.ipfs.io/ipfs/QmRqaAAQZ5ZX5eqxP7J2R1MzFrc2WDdKSWJEKtQzyawqog) is a CSV of filter sizes for blocks in the main chain. As you can see below, the ratio of filter size to block size drops after the first ~150,000 blocks: ![filter_sizes](https://user-images.githubusercontent.com/881253/42900589-299772d4-8a7e-11e8-886d-0d4f3f4fbe44.png) The reason for the relatively large filter sizes is that Golomb-coded sets only achieve good compression with a sufficient number of elements. Empirically, the average element size with 100 elements is 14% larger than with 10,000 elements. The ratio of filter size to block size is computed without witness data for basic filters. Here is a summary table of filter size ratios *for blocks after height 150,000*: | Stat | Filter Type | |-------|--------------| | Weighted Size Ratio Mean | 0.0198 | | Size Ratio Mean | 0.0224 | | Size Ratio Std Deviation | 0.0202 | | Mean Element Size (bits) | 21.145 | | Approx Theoretical Min Element Size (bits) | 21.025 | Tree-SHA512: 2d045fbfc3fc45490ecb9b08d2f7e4dbbe7cd8c1c939f06bbdb8e8aacfe4c495cdb67c820e52520baebbf8a8305a0efd8e59d3fa8e367574a4b830509a39223f
2018-08-26 16:57:01 +02:00
~BitStreamWriter()
{
Flush();
}
/** Write the nbits least significant bits of a 64-bit int to the output
* stream. Data is buffered until it completes an octet.
*/
void Write(uint64_t data, int nbits) {
if (nbits < 0 || nbits > 64) {
throw std::out_of_range("nbits must be between 0 and 64");
}
while (nbits > 0) {
int bits = std::min(8 - m_offset, nbits);
m_buffer |= (data << (64 - nbits)) >> (64 - 8 + m_offset);
m_offset += bits;
nbits -= bits;
if (m_offset == 8) {
Flush();
}
}
}
/** Flush any unwritten bits to the output stream, padding with 0's to the
* next byte boundary.
*/
void Flush() {
if (m_offset == 0) {
return;
}
m_ostream << m_buffer;
m_buffer = 0;
m_offset = 0;
}
};
/** Non-refcounted RAII wrapper for FILE*
*
* Will automatically close the file when it goes out of scope if not null.
* If you're returning the file pointer, return file.release().
* If you need to close the file early, use file.fclose() instead of fclose(file).
*/
class CAutoFile
{
private:
const int nType;
const int nVersion;
FILE* file;
public:
CAutoFile(FILE* filenew, int nTypeIn, int nVersionIn) : nType(nTypeIn), nVersion(nVersionIn)
{
file = filenew;
}
~CAutoFile()
{
fclose();
}
// Disallow copies
CAutoFile(const CAutoFile&) = delete;
CAutoFile& operator=(const CAutoFile&) = delete;
void fclose()
{
if (file) {
::fclose(file);
file = nullptr;
}
}
/** Get wrapped FILE* with transfer of ownership.
* @note This will invalidate the CAutoFile object, and makes it the responsibility of the caller
* of this function to clean up the returned FILE*.
*/
FILE* release() { FILE* ret = file; file = nullptr; return ret; }
/** Get wrapped FILE* without transfer of ownership.
* @note Ownership of the FILE* will remain with this class. Use this only if the scope of the
* CAutoFile outlives use of the passed pointer.
*/
FILE* Get() const { return file; }
/** Return true if the wrapped FILE* is nullptr, false otherwise.
*/
bool IsNull() const { return (file == nullptr); }
//
// Stream subset
//
int GetType() const { return nType; }
int GetVersion() const { return nVersion; }
void read(char* pch, size_t nSize)
{
if (!file)
throw std::ios_base::failure("CAutoFile::read: file handle is nullptr");
if (fread(pch, 1, nSize, file) != nSize)
throw std::ios_base::failure(feof(file) ? "CAutoFile::read: end of file" : "CAutoFile::read: fread failed");
}
void ignore(size_t nSize)
{
if (!file)
throw std::ios_base::failure("CAutoFile::ignore: file handle is nullptr");
unsigned char data[4096];
while (nSize > 0) {
size_t nNow = std::min<size_t>(nSize, sizeof(data));
if (fread(data, 1, nNow, file) != nNow)
throw std::ios_base::failure(feof(file) ? "CAutoFile::ignore: end of file" : "CAutoFile::read: fread failed");
nSize -= nNow;
}
}
void write(const char* pch, size_t nSize)
{
if (!file)
throw std::ios_base::failure("CAutoFile::write: file handle is nullptr");
if (fwrite(pch, 1, nSize, file) != nSize)
throw std::ios_base::failure("CAutoFile::write: write failed");
}
template<typename T>
CAutoFile& operator<<(const T& obj)
{
// Serialize to this stream
if (!file)
throw std::ios_base::failure("CAutoFile::operator<<: file handle is nullptr");
::Serialize(*this, obj);
return (*this);
}
template<typename T>
CAutoFile& operator>>(T&& obj)
{
// Unserialize from this stream
if (!file)
throw std::ios_base::failure("CAutoFile::operator>>: file handle is nullptr");
::Unserialize(*this, obj);
return (*this);
}
};
/** Non-refcounted RAII wrapper around a FILE* that implements a ring buffer to
* deserialize from. It guarantees the ability to rewind a given number of bytes.
*
* Will automatically close the file when it goes out of scope if not null.
* If you need to close the file early, use file.fclose() instead of fclose(file).
*/
class CBufferedFile
{
private:
const int nType;
const int nVersion;
FILE *src; // source file
uint64_t nSrcPos; // how many bytes have been read from source
uint64_t nReadPos; // how many bytes have been read from this
uint64_t nReadLimit; // up to which position we're allowed to read
uint64_t nRewind; // how many bytes we guarantee to rewind
std::vector<char> vchBuf; // the buffer
protected:
// read data from the source to fill the buffer
bool Fill() {
unsigned int pos = nSrcPos % vchBuf.size();
unsigned int readNow = vchBuf.size() - pos;
unsigned int nAvail = vchBuf.size() - (nSrcPos - nReadPos) - nRewind;
if (nAvail < readNow)
readNow = nAvail;
if (readNow == 0)
return false;
size_t nBytes = fread((void*)&vchBuf[pos], 1, readNow, src);
if (nBytes == 0) {
throw std::ios_base::failure(feof(src) ? "CBufferedFile::Fill: end of file" : "CBufferedFile::Fill: fread failed");
} else {
nSrcPos += nBytes;
return true;
}
}
public:
CBufferedFile(FILE *fileIn, uint64_t nBufSize, uint64_t nRewindIn, int nTypeIn, int nVersionIn) :
nType(nTypeIn), nVersion(nVersionIn), nSrcPos(0), nReadPos(0), nReadLimit((uint64_t)(-1)), nRewind(nRewindIn), vchBuf(nBufSize, 0)
{
src = fileIn;
}
~CBufferedFile()
{
fclose();
}
// Disallow copies
CBufferedFile(const CBufferedFile&) = delete;
CBufferedFile& operator=(const CBufferedFile&) = delete;
int GetVersion() const { return nVersion; }
int GetType() const { return nType; }
void fclose()
{
if (src) {
::fclose(src);
src = nullptr;
}
}
// check whether we're at the end of the source file
bool eof() const {
return nReadPos == nSrcPos && feof(src);
}
// read a number of bytes
void read(char *pch, size_t nSize) {
if (nSize + nReadPos > nReadLimit)
throw std::ios_base::failure("Read attempted past buffer limit");
if (nSize + nRewind > vchBuf.size())
throw std::ios_base::failure("Read larger than buffer size");
while (nSize > 0) {
if (nReadPos == nSrcPos)
Fill();
unsigned int pos = nReadPos % vchBuf.size();
size_t nNow = nSize;
if (nNow + pos > vchBuf.size())
nNow = vchBuf.size() - pos;
if (nNow + nReadPos > nSrcPos)
nNow = nSrcPos - nReadPos;
memcpy(pch, &vchBuf[pos], nNow);
nReadPos += nNow;
pch += nNow;
nSize -= nNow;
}
}
// return the current reading position
uint64_t GetPos() const {
return nReadPos;
}
// rewind to a given reading position
bool SetPos(uint64_t nPos) {
nReadPos = nPos;
if (nReadPos + nRewind < nSrcPos) {
nReadPos = nSrcPos - nRewind;
return false;
} else if (nReadPos > nSrcPos) {
nReadPos = nSrcPos;
return false;
} else {
return true;
}
}
bool Seek(uint64_t nPos) {
long nLongPos = nPos;
if (nPos != (uint64_t)nLongPos)
return false;
if (fseek(src, nLongPos, SEEK_SET))
return false;
nLongPos = ftell(src);
nSrcPos = nLongPos;
nReadPos = nLongPos;
return true;
}
// prevent reading beyond a certain position
// no argument removes the limit
bool SetLimit(uint64_t nPos = (uint64_t)(-1)) {
if (nPos < nReadPos)
return false;
nReadLimit = nPos;
return true;
}
template<typename T>
CBufferedFile& operator>>(T&& obj) {
// Unserialize from this stream
::Unserialize(*this, obj);
return (*this);
}
// search for a given byte in the stream, and remain positioned on it
void FindByte(char ch) {
while (true) {
if (nReadPos == nSrcPos)
Fill();
if (vchBuf[nReadPos % vchBuf.size()] == ch)
break;
nReadPos++;
}
}
};
#endif // BITCOIN_STREAMS_H