dash/src/txmempool.cpp

1661 lines
66 KiB
C++
Raw Normal View History

// Copyright (c) 2009-2010 Satoshi Nakamoto
// Copyright (c) 2009-2020 The Bitcoin Core developers
// Distributed under the MIT software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
Backport 11651 (#3358) * scripted-diff: Replace #include "" with #include <> (ryanofsky) -BEGIN VERIFY SCRIPT- for f in \ src/*.cpp \ src/*.h \ src/bench/*.cpp \ src/bench/*.h \ src/compat/*.cpp \ src/compat/*.h \ src/consensus/*.cpp \ src/consensus/*.h \ src/crypto/*.cpp \ src/crypto/*.h \ src/crypto/ctaes/*.h \ src/policy/*.cpp \ src/policy/*.h \ src/primitives/*.cpp \ src/primitives/*.h \ src/qt/*.cpp \ src/qt/*.h \ src/qt/test/*.cpp \ src/qt/test/*.h \ src/rpc/*.cpp \ src/rpc/*.h \ src/script/*.cpp \ src/script/*.h \ src/support/*.cpp \ src/support/*.h \ src/support/allocators/*.h \ src/test/*.cpp \ src/test/*.h \ src/wallet/*.cpp \ src/wallet/*.h \ src/wallet/test/*.cpp \ src/wallet/test/*.h \ src/zmq/*.cpp \ src/zmq/*.h do base=${f%/*}/ relbase=${base#src/} sed -i "s:#include \"\(.*\)\"\(.*\):if test -e \$base'\\1'; then echo \"#include <\"\$relbase\"\\1>\\2\"; else echo \"#include <\\1>\\2\"; fi:e" $f done -END VERIFY SCRIPT- Signed-off-by: Pasta <pasta@dashboost.org> * scripted-diff: Replace #include "" with #include <> (Dash Specific) -BEGIN VERIFY SCRIPT- for f in \ src/bls/*.cpp \ src/bls/*.h \ src/evo/*.cpp \ src/evo/*.h \ src/governance/*.cpp \ src/governance/*.h \ src/llmq/*.cpp \ src/llmq/*.h \ src/masternode/*.cpp \ src/masternode/*.h \ src/privatesend/*.cpp \ src/privatesend/*.h do base=${f%/*}/ relbase=${base#src/} sed -i "s:#include \"\(.*\)\"\(.*\):if test -e \$base'\\1'; then echo \"#include <\"\$relbase\"\\1>\\2\"; else echo \"#include <\\1>\\2\"; fi:e" $f done -END VERIFY SCRIPT- Signed-off-by: Pasta <pasta@dashboost.org> * build: Remove -I for everything but project root Remove -I from build system for everything but the project root, and built-in dependencies. Signed-off-by: Pasta <pasta@dashboost.org> # Conflicts: # src/Makefile.test.include * qt: refactor: Use absolute include paths in .ui files * qt: refactor: Changes to make include paths absolute This makes all include paths in the GUI absolute. Many changes are involved as every single source file in src/qt/ assumes to be able to use relative includes. Signed-off-by: Pasta <pasta@dashboost.org> # Conflicts: # src/qt/dash.cpp # src/qt/optionsmodel.cpp # src/qt/test/rpcnestedtests.cpp * test: refactor: Use absolute include paths for test data files * Recommend #include<> syntax in developer notes * refactor: Include obj/build.h instead of build.h * END BACKPORT #11651 Remove trailing whitespace causing travis failure * fix backport 11651 Signed-off-by: Pasta <pasta@dashboost.org> * More of 11651 * fix blockchain.cpp Signed-off-by: pasta <pasta@dashboost.org> * Add missing "qt/" in includes * Add missing "test/" in includes * Fix trailing whitespaces Co-authored-by: Wladimir J. van der Laan <laanwj@gmail.com> Co-authored-by: Russell Yanofsky <russ@yanofsky.org> Co-authored-by: MeshCollider <dobsonsa68@gmail.com> Co-authored-by: UdjinM6 <UdjinM6@users.noreply.github.com>
2020-03-19 23:46:56 +01:00
#include <txmempool.h>
#include <consensus/consensus.h>
#include <consensus/tx_verify.h>
#include <consensus/validation.h>
#include <hash.h>
Backport 11651 (#3358) * scripted-diff: Replace #include "" with #include <> (ryanofsky) -BEGIN VERIFY SCRIPT- for f in \ src/*.cpp \ src/*.h \ src/bench/*.cpp \ src/bench/*.h \ src/compat/*.cpp \ src/compat/*.h \ src/consensus/*.cpp \ src/consensus/*.h \ src/crypto/*.cpp \ src/crypto/*.h \ src/crypto/ctaes/*.h \ src/policy/*.cpp \ src/policy/*.h \ src/primitives/*.cpp \ src/primitives/*.h \ src/qt/*.cpp \ src/qt/*.h \ src/qt/test/*.cpp \ src/qt/test/*.h \ src/rpc/*.cpp \ src/rpc/*.h \ src/script/*.cpp \ src/script/*.h \ src/support/*.cpp \ src/support/*.h \ src/support/allocators/*.h \ src/test/*.cpp \ src/test/*.h \ src/wallet/*.cpp \ src/wallet/*.h \ src/wallet/test/*.cpp \ src/wallet/test/*.h \ src/zmq/*.cpp \ src/zmq/*.h do base=${f%/*}/ relbase=${base#src/} sed -i "s:#include \"\(.*\)\"\(.*\):if test -e \$base'\\1'; then echo \"#include <\"\$relbase\"\\1>\\2\"; else echo \"#include <\\1>\\2\"; fi:e" $f done -END VERIFY SCRIPT- Signed-off-by: Pasta <pasta@dashboost.org> * scripted-diff: Replace #include "" with #include <> (Dash Specific) -BEGIN VERIFY SCRIPT- for f in \ src/bls/*.cpp \ src/bls/*.h \ src/evo/*.cpp \ src/evo/*.h \ src/governance/*.cpp \ src/governance/*.h \ src/llmq/*.cpp \ src/llmq/*.h \ src/masternode/*.cpp \ src/masternode/*.h \ src/privatesend/*.cpp \ src/privatesend/*.h do base=${f%/*}/ relbase=${base#src/} sed -i "s:#include \"\(.*\)\"\(.*\):if test -e \$base'\\1'; then echo \"#include <\"\$relbase\"\\1>\\2\"; else echo \"#include <\\1>\\2\"; fi:e" $f done -END VERIFY SCRIPT- Signed-off-by: Pasta <pasta@dashboost.org> * build: Remove -I for everything but project root Remove -I from build system for everything but the project root, and built-in dependencies. Signed-off-by: Pasta <pasta@dashboost.org> # Conflicts: # src/Makefile.test.include * qt: refactor: Use absolute include paths in .ui files * qt: refactor: Changes to make include paths absolute This makes all include paths in the GUI absolute. Many changes are involved as every single source file in src/qt/ assumes to be able to use relative includes. Signed-off-by: Pasta <pasta@dashboost.org> # Conflicts: # src/qt/dash.cpp # src/qt/optionsmodel.cpp # src/qt/test/rpcnestedtests.cpp * test: refactor: Use absolute include paths for test data files * Recommend #include<> syntax in developer notes * refactor: Include obj/build.h instead of build.h * END BACKPORT #11651 Remove trailing whitespace causing travis failure * fix backport 11651 Signed-off-by: Pasta <pasta@dashboost.org> * More of 11651 * fix blockchain.cpp Signed-off-by: pasta <pasta@dashboost.org> * Add missing "qt/" in includes * Add missing "test/" in includes * Fix trailing whitespaces Co-authored-by: Wladimir J. van der Laan <laanwj@gmail.com> Co-authored-by: Russell Yanofsky <russ@yanofsky.org> Co-authored-by: MeshCollider <dobsonsa68@gmail.com> Co-authored-by: UdjinM6 <UdjinM6@users.noreply.github.com>
2020-03-19 23:46:56 +01:00
#include <policy/fees.h>
#include <policy/policy.h>
#include <policy/settings.h>
Backport 11651 (#3358) * scripted-diff: Replace #include "" with #include <> (ryanofsky) -BEGIN VERIFY SCRIPT- for f in \ src/*.cpp \ src/*.h \ src/bench/*.cpp \ src/bench/*.h \ src/compat/*.cpp \ src/compat/*.h \ src/consensus/*.cpp \ src/consensus/*.h \ src/crypto/*.cpp \ src/crypto/*.h \ src/crypto/ctaes/*.h \ src/policy/*.cpp \ src/policy/*.h \ src/primitives/*.cpp \ src/primitives/*.h \ src/qt/*.cpp \ src/qt/*.h \ src/qt/test/*.cpp \ src/qt/test/*.h \ src/rpc/*.cpp \ src/rpc/*.h \ src/script/*.cpp \ src/script/*.h \ src/support/*.cpp \ src/support/*.h \ src/support/allocators/*.h \ src/test/*.cpp \ src/test/*.h \ src/wallet/*.cpp \ src/wallet/*.h \ src/wallet/test/*.cpp \ src/wallet/test/*.h \ src/zmq/*.cpp \ src/zmq/*.h do base=${f%/*}/ relbase=${base#src/} sed -i "s:#include \"\(.*\)\"\(.*\):if test -e \$base'\\1'; then echo \"#include <\"\$relbase\"\\1>\\2\"; else echo \"#include <\\1>\\2\"; fi:e" $f done -END VERIFY SCRIPT- Signed-off-by: Pasta <pasta@dashboost.org> * scripted-diff: Replace #include "" with #include <> (Dash Specific) -BEGIN VERIFY SCRIPT- for f in \ src/bls/*.cpp \ src/bls/*.h \ src/evo/*.cpp \ src/evo/*.h \ src/governance/*.cpp \ src/governance/*.h \ src/llmq/*.cpp \ src/llmq/*.h \ src/masternode/*.cpp \ src/masternode/*.h \ src/privatesend/*.cpp \ src/privatesend/*.h do base=${f%/*}/ relbase=${base#src/} sed -i "s:#include \"\(.*\)\"\(.*\):if test -e \$base'\\1'; then echo \"#include <\"\$relbase\"\\1>\\2\"; else echo \"#include <\\1>\\2\"; fi:e" $f done -END VERIFY SCRIPT- Signed-off-by: Pasta <pasta@dashboost.org> * build: Remove -I for everything but project root Remove -I from build system for everything but the project root, and built-in dependencies. Signed-off-by: Pasta <pasta@dashboost.org> # Conflicts: # src/Makefile.test.include * qt: refactor: Use absolute include paths in .ui files * qt: refactor: Changes to make include paths absolute This makes all include paths in the GUI absolute. Many changes are involved as every single source file in src/qt/ assumes to be able to use relative includes. Signed-off-by: Pasta <pasta@dashboost.org> # Conflicts: # src/qt/dash.cpp # src/qt/optionsmodel.cpp # src/qt/test/rpcnestedtests.cpp * test: refactor: Use absolute include paths for test data files * Recommend #include<> syntax in developer notes * refactor: Include obj/build.h instead of build.h * END BACKPORT #11651 Remove trailing whitespace causing travis failure * fix backport 11651 Signed-off-by: Pasta <pasta@dashboost.org> * More of 11651 * fix blockchain.cpp Signed-off-by: pasta <pasta@dashboost.org> * Add missing "qt/" in includes * Add missing "test/" in includes * Fix trailing whitespaces Co-authored-by: Wladimir J. van der Laan <laanwj@gmail.com> Co-authored-by: Russell Yanofsky <russ@yanofsky.org> Co-authored-by: MeshCollider <dobsonsa68@gmail.com> Co-authored-by: UdjinM6 <UdjinM6@users.noreply.github.com>
2020-03-19 23:46:56 +01:00
#include <reverse_iterator.h>
#include <util/check.h>
merge bitcoin#14555: Move util files to directory (script modified to account for Dash backports, doesn't account for rebasing) ------------- BEGIN SCRIPT --------------- mkdir -p src/util git mv src/util.h src/util/system.h git mv src/util.cpp src/util/system.cpp git mv src/utilmemory.h src/util/memory.h git mv src/utilmoneystr.h src/util/moneystr.h git mv src/utilmoneystr.cpp src/util/moneystr.cpp git mv src/utilstrencodings.h src/util/strencodings.h git mv src/utilstrencodings.cpp src/util/strencodings.cpp git mv src/utiltime.h src/util/time.h git mv src/utiltime.cpp src/util/time.cpp git mv src/utilasmap.h src/util/asmap.h git mv src/utilasmap.cpp src/util/asmap.cpp git mv src/utilstring.h src/util/string.h git mv src/utilstring.cpp src/util/string.cpp gsed -i 's/<util\.h>/<util\/system\.h>/g' $(git ls-files 'src/*.h' 'src/*.cpp') gsed -i 's/<utilmemory\.h>/<util\/memory\.h>/g' $(git ls-files 'src/*.h' 'src/*.cpp') gsed -i 's/<utilmoneystr\.h>/<util\/moneystr\.h>/g' $(git ls-files 'src/*.h' 'src/*.cpp') gsed -i 's/<utilstrencodings\.h>/<util\/strencodings\.h>/g' $(git ls-files 'src/*.h' 'src/*.cpp') gsed -i 's/<utiltime\.h>/<util\/time\.h>/g' $(git ls-files 'src/*.h' 'src/*.cpp') gsed -i 's/<utilasmap\.h>/<util\/asmap\.h>/g' $(git ls-files 'src/*.h' 'src/*.cpp') gsed -i 's/<utilstring\.h>/<util\/string\.h>/g' $(git ls-files 'src/*.h' 'src/*.cpp') gsed -i 's/BITCOIN_UTIL_H/BITCOIN_UTIL_SYSTEM_H/g' src/util/system.h gsed -i 's/BITCOIN_UTILMEMORY_H/BITCOIN_UTIL_MEMORY_H/g' src/util/memory.h gsed -i 's/BITCOIN_UTILMONEYSTR_H/BITCOIN_UTIL_MONEYSTR_H/g' src/util/moneystr.h gsed -i 's/BITCOIN_UTILSTRENCODINGS_H/BITCOIN_UTIL_STRENCODINGS_H/g' src/util/strencodings.h gsed -i 's/BITCOIN_UTILTIME_H/BITCOIN_UTIL_TIME_H/g' src/util/time.h gsed -i 's/BITCOIN_UTILASMAP_H/BITCOIN_UTIL_ASMAP_H/g' src/util/asmap.h gsed -i 's/BITCOIN_UTILSTRING_H/BITCOIN_UTIL_STRING_H/g' src/util/string.h gsed -i 's/ util\.\(h\|cpp\)/ util\/system\.\1/g' src/Makefile.am gsed -i 's/utilmemory\.\(h\|cpp\)/util\/memory\.\1/g' src/Makefile.am gsed -i 's/utilmoneystr\.\(h\|cpp\)/util\/moneystr\.\1/g' src/Makefile.am gsed -i 's/utilstrencodings\.\(h\|cpp\)/util\/strencodings\.\1/g' src/Makefile.am gsed -i 's/utiltime\.\(h\|cpp\)/util\/time\.\1/g' src/Makefile.am gsed -i 's/utilasmap\.\(h\|cpp\)/util\/asmap\.\1/g' src/Makefile.am gsed -i 's/utilstring\.\(h\|cpp\)/util\/string\.\1/g' src/Makefile.am gsed -i 's/-> util ->/-> util\/system ->/' test/lint/lint-circular-dependencies.sh gsed -i 's/src\/util\.cpp/src\/util\/system\.cpp/g' test/lint/lint-format-strings.py test/lint/lint-locale-dependence.sh gsed -i 's/src\/utilmoneystr\.cpp/src\/util\/moneystr\.cpp/g' test/lint/lint-locale-dependence.sh gsed -i 's/src\/utilstrencodings\.\(h\|cpp\)/src\/util\/strencodings\.\1/g' test/lint/lint-locale-dependence.sh ------------- END SCRIPT ---------------
2021-06-27 08:33:13 +02:00
#include <util/moneystr.h>
#include <util/system.h>
merge bitcoin#14555: Move util files to directory (script modified to account for Dash backports, doesn't account for rebasing) ------------- BEGIN SCRIPT --------------- mkdir -p src/util git mv src/util.h src/util/system.h git mv src/util.cpp src/util/system.cpp git mv src/utilmemory.h src/util/memory.h git mv src/utilmoneystr.h src/util/moneystr.h git mv src/utilmoneystr.cpp src/util/moneystr.cpp git mv src/utilstrencodings.h src/util/strencodings.h git mv src/utilstrencodings.cpp src/util/strencodings.cpp git mv src/utiltime.h src/util/time.h git mv src/utiltime.cpp src/util/time.cpp git mv src/utilasmap.h src/util/asmap.h git mv src/utilasmap.cpp src/util/asmap.cpp git mv src/utilstring.h src/util/string.h git mv src/utilstring.cpp src/util/string.cpp gsed -i 's/<util\.h>/<util\/system\.h>/g' $(git ls-files 'src/*.h' 'src/*.cpp') gsed -i 's/<utilmemory\.h>/<util\/memory\.h>/g' $(git ls-files 'src/*.h' 'src/*.cpp') gsed -i 's/<utilmoneystr\.h>/<util\/moneystr\.h>/g' $(git ls-files 'src/*.h' 'src/*.cpp') gsed -i 's/<utilstrencodings\.h>/<util\/strencodings\.h>/g' $(git ls-files 'src/*.h' 'src/*.cpp') gsed -i 's/<utiltime\.h>/<util\/time\.h>/g' $(git ls-files 'src/*.h' 'src/*.cpp') gsed -i 's/<utilasmap\.h>/<util\/asmap\.h>/g' $(git ls-files 'src/*.h' 'src/*.cpp') gsed -i 's/<utilstring\.h>/<util\/string\.h>/g' $(git ls-files 'src/*.h' 'src/*.cpp') gsed -i 's/BITCOIN_UTIL_H/BITCOIN_UTIL_SYSTEM_H/g' src/util/system.h gsed -i 's/BITCOIN_UTILMEMORY_H/BITCOIN_UTIL_MEMORY_H/g' src/util/memory.h gsed -i 's/BITCOIN_UTILMONEYSTR_H/BITCOIN_UTIL_MONEYSTR_H/g' src/util/moneystr.h gsed -i 's/BITCOIN_UTILSTRENCODINGS_H/BITCOIN_UTIL_STRENCODINGS_H/g' src/util/strencodings.h gsed -i 's/BITCOIN_UTILTIME_H/BITCOIN_UTIL_TIME_H/g' src/util/time.h gsed -i 's/BITCOIN_UTILASMAP_H/BITCOIN_UTIL_ASMAP_H/g' src/util/asmap.h gsed -i 's/BITCOIN_UTILSTRING_H/BITCOIN_UTIL_STRING_H/g' src/util/string.h gsed -i 's/ util\.\(h\|cpp\)/ util\/system\.\1/g' src/Makefile.am gsed -i 's/utilmemory\.\(h\|cpp\)/util\/memory\.\1/g' src/Makefile.am gsed -i 's/utilmoneystr\.\(h\|cpp\)/util\/moneystr\.\1/g' src/Makefile.am gsed -i 's/utilstrencodings\.\(h\|cpp\)/util\/strencodings\.\1/g' src/Makefile.am gsed -i 's/utiltime\.\(h\|cpp\)/util\/time\.\1/g' src/Makefile.am gsed -i 's/utilasmap\.\(h\|cpp\)/util\/asmap\.\1/g' src/Makefile.am gsed -i 's/utilstring\.\(h\|cpp\)/util\/string\.\1/g' src/Makefile.am gsed -i 's/-> util ->/-> util\/system ->/' test/lint/lint-circular-dependencies.sh gsed -i 's/src\/util\.cpp/src\/util\/system\.cpp/g' test/lint/lint-format-strings.py test/lint/lint-locale-dependence.sh gsed -i 's/src\/utilmoneystr\.cpp/src\/util\/moneystr\.cpp/g' test/lint/lint-locale-dependence.sh gsed -i 's/src\/utilstrencodings\.\(h\|cpp\)/src\/util\/strencodings\.\1/g' test/lint/lint-locale-dependence.sh ------------- END SCRIPT ---------------
2021-06-27 08:33:13 +02:00
#include <util/time.h>
#include <validation.h>
#include <validationinterface.h>
Backport 11651 (#3358) * scripted-diff: Replace #include "" with #include <> (ryanofsky) -BEGIN VERIFY SCRIPT- for f in \ src/*.cpp \ src/*.h \ src/bench/*.cpp \ src/bench/*.h \ src/compat/*.cpp \ src/compat/*.h \ src/consensus/*.cpp \ src/consensus/*.h \ src/crypto/*.cpp \ src/crypto/*.h \ src/crypto/ctaes/*.h \ src/policy/*.cpp \ src/policy/*.h \ src/primitives/*.cpp \ src/primitives/*.h \ src/qt/*.cpp \ src/qt/*.h \ src/qt/test/*.cpp \ src/qt/test/*.h \ src/rpc/*.cpp \ src/rpc/*.h \ src/script/*.cpp \ src/script/*.h \ src/support/*.cpp \ src/support/*.h \ src/support/allocators/*.h \ src/test/*.cpp \ src/test/*.h \ src/wallet/*.cpp \ src/wallet/*.h \ src/wallet/test/*.cpp \ src/wallet/test/*.h \ src/zmq/*.cpp \ src/zmq/*.h do base=${f%/*}/ relbase=${base#src/} sed -i "s:#include \"\(.*\)\"\(.*\):if test -e \$base'\\1'; then echo \"#include <\"\$relbase\"\\1>\\2\"; else echo \"#include <\\1>\\2\"; fi:e" $f done -END VERIFY SCRIPT- Signed-off-by: Pasta <pasta@dashboost.org> * scripted-diff: Replace #include "" with #include <> (Dash Specific) -BEGIN VERIFY SCRIPT- for f in \ src/bls/*.cpp \ src/bls/*.h \ src/evo/*.cpp \ src/evo/*.h \ src/governance/*.cpp \ src/governance/*.h \ src/llmq/*.cpp \ src/llmq/*.h \ src/masternode/*.cpp \ src/masternode/*.h \ src/privatesend/*.cpp \ src/privatesend/*.h do base=${f%/*}/ relbase=${base#src/} sed -i "s:#include \"\(.*\)\"\(.*\):if test -e \$base'\\1'; then echo \"#include <\"\$relbase\"\\1>\\2\"; else echo \"#include <\\1>\\2\"; fi:e" $f done -END VERIFY SCRIPT- Signed-off-by: Pasta <pasta@dashboost.org> * build: Remove -I for everything but project root Remove -I from build system for everything but the project root, and built-in dependencies. Signed-off-by: Pasta <pasta@dashboost.org> # Conflicts: # src/Makefile.test.include * qt: refactor: Use absolute include paths in .ui files * qt: refactor: Changes to make include paths absolute This makes all include paths in the GUI absolute. Many changes are involved as every single source file in src/qt/ assumes to be able to use relative includes. Signed-off-by: Pasta <pasta@dashboost.org> # Conflicts: # src/qt/dash.cpp # src/qt/optionsmodel.cpp # src/qt/test/rpcnestedtests.cpp * test: refactor: Use absolute include paths for test data files * Recommend #include<> syntax in developer notes * refactor: Include obj/build.h instead of build.h * END BACKPORT #11651 Remove trailing whitespace causing travis failure * fix backport 11651 Signed-off-by: Pasta <pasta@dashboost.org> * More of 11651 * fix blockchain.cpp Signed-off-by: pasta <pasta@dashboost.org> * Add missing "qt/" in includes * Add missing "test/" in includes * Fix trailing whitespaces Co-authored-by: Wladimir J. van der Laan <laanwj@gmail.com> Co-authored-by: Russell Yanofsky <russ@yanofsky.org> Co-authored-by: MeshCollider <dobsonsa68@gmail.com> Co-authored-by: UdjinM6 <UdjinM6@users.noreply.github.com>
2020-03-19 23:46:56 +01:00
#include <evo/specialtx.h>
#include <evo/assetlocktx.h>
Backport 11651 (#3358) * scripted-diff: Replace #include "" with #include <> (ryanofsky) -BEGIN VERIFY SCRIPT- for f in \ src/*.cpp \ src/*.h \ src/bench/*.cpp \ src/bench/*.h \ src/compat/*.cpp \ src/compat/*.h \ src/consensus/*.cpp \ src/consensus/*.h \ src/crypto/*.cpp \ src/crypto/*.h \ src/crypto/ctaes/*.h \ src/policy/*.cpp \ src/policy/*.h \ src/primitives/*.cpp \ src/primitives/*.h \ src/qt/*.cpp \ src/qt/*.h \ src/qt/test/*.cpp \ src/qt/test/*.h \ src/rpc/*.cpp \ src/rpc/*.h \ src/script/*.cpp \ src/script/*.h \ src/support/*.cpp \ src/support/*.h \ src/support/allocators/*.h \ src/test/*.cpp \ src/test/*.h \ src/wallet/*.cpp \ src/wallet/*.h \ src/wallet/test/*.cpp \ src/wallet/test/*.h \ src/zmq/*.cpp \ src/zmq/*.h do base=${f%/*}/ relbase=${base#src/} sed -i "s:#include \"\(.*\)\"\(.*\):if test -e \$base'\\1'; then echo \"#include <\"\$relbase\"\\1>\\2\"; else echo \"#include <\\1>\\2\"; fi:e" $f done -END VERIFY SCRIPT- Signed-off-by: Pasta <pasta@dashboost.org> * scripted-diff: Replace #include "" with #include <> (Dash Specific) -BEGIN VERIFY SCRIPT- for f in \ src/bls/*.cpp \ src/bls/*.h \ src/evo/*.cpp \ src/evo/*.h \ src/governance/*.cpp \ src/governance/*.h \ src/llmq/*.cpp \ src/llmq/*.h \ src/masternode/*.cpp \ src/masternode/*.h \ src/privatesend/*.cpp \ src/privatesend/*.h do base=${f%/*}/ relbase=${base#src/} sed -i "s:#include \"\(.*\)\"\(.*\):if test -e \$base'\\1'; then echo \"#include <\"\$relbase\"\\1>\\2\"; else echo \"#include <\\1>\\2\"; fi:e" $f done -END VERIFY SCRIPT- Signed-off-by: Pasta <pasta@dashboost.org> * build: Remove -I for everything but project root Remove -I from build system for everything but the project root, and built-in dependencies. Signed-off-by: Pasta <pasta@dashboost.org> # Conflicts: # src/Makefile.test.include * qt: refactor: Use absolute include paths in .ui files * qt: refactor: Changes to make include paths absolute This makes all include paths in the GUI absolute. Many changes are involved as every single source file in src/qt/ assumes to be able to use relative includes. Signed-off-by: Pasta <pasta@dashboost.org> # Conflicts: # src/qt/dash.cpp # src/qt/optionsmodel.cpp # src/qt/test/rpcnestedtests.cpp * test: refactor: Use absolute include paths for test data files * Recommend #include<> syntax in developer notes * refactor: Include obj/build.h instead of build.h * END BACKPORT #11651 Remove trailing whitespace causing travis failure * fix backport 11651 Signed-off-by: Pasta <pasta@dashboost.org> * More of 11651 * fix blockchain.cpp Signed-off-by: pasta <pasta@dashboost.org> * Add missing "qt/" in includes * Add missing "test/" in includes * Fix trailing whitespaces Co-authored-by: Wladimir J. van der Laan <laanwj@gmail.com> Co-authored-by: Russell Yanofsky <russ@yanofsky.org> Co-authored-by: MeshCollider <dobsonsa68@gmail.com> Co-authored-by: UdjinM6 <UdjinM6@users.noreply.github.com>
2020-03-19 23:46:56 +01:00
#include <evo/providertx.h>
#include <evo/deterministicmns.h>
#include <llmq/instantsend.h>
#include <cmath>
#include <optional>
CTxMemPoolEntry::CTxMemPoolEntry(const CTransactionRef& _tx, const CAmount& _nFee,
int64_t _nTime, unsigned int _entryHeight,
bool _spendsCoinbase, unsigned int _sigOps, LockPoints lp)
: tx(_tx), nFee(_nFee), nTxSize(tx->GetTotalSize()), nUsageSize(RecursiveDynamicUsage(tx)), nTime(_nTime), entryHeight(_entryHeight),
spendsCoinbase(_spendsCoinbase), sigOpCount(_sigOps), lockPoints(lp)
{
nCountWithDescendants = 1;
nSizeWithDescendants = GetTxSize();
nModFeesWithDescendants = nFee;
feeDelta = 0;
nCountWithAncestors = 1;
nSizeWithAncestors = GetTxSize();
nModFeesWithAncestors = nFee;
nSigOpCountWithAncestors = sigOpCount;
}
void CTxMemPoolEntry::UpdateFeeDelta(int64_t newFeeDelta)
estimatefee / estimatepriority RPC methods New RPC methods: return an estimate of the fee (or priority) a transaction needs to be likely to confirm in a given number of blocks. Mike Hearn created the first version of this method for estimating fees. It works as follows: For transactions that took 1 to N (I picked N=25) blocks to confirm, keep N buckets with at most 100 entries in each recording the fees-per-kilobyte paid by those transactions. (separate buckets are kept for transactions that confirmed because they are high-priority) The buckets are filled as blocks are found, and are saved/restored in a new fee_estiamtes.dat file in the data directory. A few variations on Mike's initial scheme: To estimate the fee needed for a transaction to confirm in X buckets, all of the samples in all of the buckets are used and a median of all of the data is used to make the estimate. For example, imagine 25 buckets each containing the full 100 entries. Those 2,500 samples are sorted, and the estimate of the fee needed to confirm in the very next block is the 50'th-highest-fee-entry in that sorted list; the estimate of the fee needed to confirm in the next two blocks is the 150'th-highest-fee-entry, etc. That algorithm has the nice property that estimates of how much fee you need to pay to get confirmed in block N will always be greater than or equal to the estimate for block N+1. It would clearly be wrong to say "pay 11 uBTC and you'll get confirmed in 3 blocks, but pay 12 uBTC and it will take LONGER". A single block will not contribute more than 10 entries to any one bucket, so a single miner and a large block cannot overwhelm the estimates.
2014-03-17 13:19:54 +01:00
{
nModFeesWithDescendants += newFeeDelta - feeDelta;
nModFeesWithAncestors += newFeeDelta - feeDelta;
feeDelta = newFeeDelta;
}
estimatefee / estimatepriority RPC methods New RPC methods: return an estimate of the fee (or priority) a transaction needs to be likely to confirm in a given number of blocks. Mike Hearn created the first version of this method for estimating fees. It works as follows: For transactions that took 1 to N (I picked N=25) blocks to confirm, keep N buckets with at most 100 entries in each recording the fees-per-kilobyte paid by those transactions. (separate buckets are kept for transactions that confirmed because they are high-priority) The buckets are filled as blocks are found, and are saved/restored in a new fee_estiamtes.dat file in the data directory. A few variations on Mike's initial scheme: To estimate the fee needed for a transaction to confirm in X buckets, all of the samples in all of the buckets are used and a median of all of the data is used to make the estimate. For example, imagine 25 buckets each containing the full 100 entries. Those 2,500 samples are sorted, and the estimate of the fee needed to confirm in the very next block is the 50'th-highest-fee-entry in that sorted list; the estimate of the fee needed to confirm in the next two blocks is the 150'th-highest-fee-entry, etc. That algorithm has the nice property that estimates of how much fee you need to pay to get confirmed in block N will always be greater than or equal to the estimate for block N+1. It would clearly be wrong to say "pay 11 uBTC and you'll get confirmed in 3 blocks, but pay 12 uBTC and it will take LONGER". A single block will not contribute more than 10 entries to any one bucket, so a single miner and a large block cannot overwhelm the estimates.
2014-03-17 13:19:54 +01:00
void CTxMemPoolEntry::UpdateLockPoints(const LockPoints& lp)
{
lockPoints = lp;
}
size_t CTxMemPoolEntry::GetTxSize() const
{
return GetVirtualTransactionSize(nTxSize, sigOpCount);
}
// Update the given tx for any in-mempool descendants.
Merge #19478: Remove CTxMempool::mapLinks data structure member 296be8f58e02b39a58f017c52294aceed22c3ffd Get rid of unused functions CTxMemPool::GetMemPoolChildren, CTxMemPool::GetMemPoolParents (Jeremy Rubin) 46d955d196043cc297834baeebce31ff778dff80 Remove mapLinks in favor of entry inlined structs with iterator type erasure (Jeremy Rubin) Pull request description: Currently we have a peculiar data structure in the mempool called maplinks. Maplinks job is to track the in-pool children and parents of each transaction. This PR can be primarily understood and reviewed as a simple refactoring to remove this extra data structure, although it comes with a nice memory and performance improvement for free. Maplinks is particularly peculiar because removing it is not as simple as just moving it's inner structure to the owning CTxMempoolEntry. Because TxLinks (the class storing the setEntries for parents and children) store txiters to each entry in the mempool corresponding to the parent or child, it means that the TxLinks type is "aware" of the boost multiindex (mapTx) it's coming from, which is in turn, aware of the entry type stored in mapTx. Thus we used maplinks to store this entry associated data we in an entirely separate data structure just to avoid a circular type reference caused by storing a txiter inside a CTxMempoolEntry. It turns out, we can kill this circular reference by making use of iterator_to multiindex function and std::reference_wrapper. This allows us to get rid of the maplinks data structure and move the ownership of the parents/child sets to the entries themselves. The benefit of this good all around, for any of the reasons given below the change would be acceptable, and it doesn't make the code harder to reason about or worse in any respect (as far as I can tell, there's no tradeoff). ### Simpler ownership model No longer having to consistency check that mapLinks did have records for our CTxMempoolEntry, impossible to have a mapLinks entry outlive or incorrectly die before a CTxMempoolEntry. ### Memory Usage We get rid of a O(Transactions) sized map in the mempool, which is a long lived data structure. ### Performance If you have a CTxMemPoolEntry, you immediately know the address of it's children/parents, rather than having to do a O(log(Transactions)) lookup via maplinks (which we do very often). We do it in *so many* places that a true benchmark has to look at a full running node, but it is easy enough to show an improvement in this case. The ComplexMemPool shows a good coherence check that we see the expected result of it being 12.5% faster / 1.14x faster. ``` Before: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.40462, 0.277222, 0.285339, 0.279793 After: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.22586, 0.243831, 0.247076, 0.244596 ``` The ComplexMemPool benchmark only checks doing addUnchecked and TrimToSize for 800 transactions. While this bench does a good job of hammering the relevant types of function, it doesn't test everything. Subbing in 5000 transactions shows a that the advantage isn't completely wiped out by other asymptotic factors (this isn't the only bottleneck in growing the mempool), but it's only a bit proportionally slower (10.8%, 1.12x), which adds evidence that this will be a good change for performance minded users. ``` # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 59.1321, 11.5919, 12.235, 11.7068 # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 52.1307, 10.2641, 10.5206, 10.4306 ``` I don't think it's possible to come up with an example of where a maplinks based design would have better performance, but it's something for reviewers to consider. # Discussion ## Why maplinks in the first place? I spoke with the author of mapLinks (sdaftuar) a while back, and my recollection from our conversation was that it was implemented because he did not know how to resolve the circular dependency at the time, and there was no other reason for making it a separate map. ## Is iterator_to weird? iterator_to is expressly for this purpose, see https://www.boost.org/doc/libs/1_51_0/libs/multi_index/doc/tutorial/indices.html#iterator_to > iterator_to provides a way to retrieve an iterator to an element from a pointer to the element, thus making iterators and pointers interchangeable for the purposes of element pointing (not so for traversal) in many situations. This notwithstanding, it is not the aim of iterator_to to promote the usage of pointers as substitutes for real iterators: the latter are specifically designed for handling the elements of a container, and not only benefit from the iterator orientation of container interfaces, but are also capable of exposing many more programming bugs than raw pointers, both at compile and run time. iterator_to is thus meant to be used in scenarios where access via iterators is not suitable or desireable: > > - Interoperability with preexisting APIs based on pointers or references. > - Publication of pointer-based interfaces (for instance, when designing a C-compatible library). > - The exposure of pointers in place of iterators can act as a type erasure barrier effectively decoupling the user of the code from the implementation detail of which particular container is being used. Similar techniques, like the famous Pimpl idiom, are used in large projects to reduce dependencies and build times. > - Self-referencing contexts where an element acts upon its owner container and no iterator to itself is available. In other words, iterator_to is the perfect tool for the job by the last reason given. Under the hood it should just be a simple pointer cast and have no major runtime overhead (depending on if the function call is inlined). Edit by laanwj: removed at sign from the description ACKs for top commit: jonatack: re-ACK 296be8f per `git range-diff ab338a19 3ba1665 296be8f`, sanity check gcc 10.2 debug build is clean. hebasto: re-ACK 296be8f58e02b39a58f017c52294aceed22c3ffd, only rebased since my [previous](https://github.com/bitcoin/bitcoin/pull/19478#pullrequestreview-482400727) review (verified with `git range-diff`). Tree-SHA512: f5c30a4936fcde6ae32a02823c303b3568a747c2681d11f87df88a149f984a6d3b4c81f391859afbeb68864ef7f6a3d8779f74a58e3de701b3d51f78e498682e
2020-09-07 12:06:38 +02:00
// Assumes that CTxMemPool::m_children is correct for the given tx and all
// descendants.
void CTxMemPool::UpdateForDescendants(txiter updateIt, cacheMap &cachedDescendants, const std::set<uint256> &setExclude)
{
Merge #19478: Remove CTxMempool::mapLinks data structure member 296be8f58e02b39a58f017c52294aceed22c3ffd Get rid of unused functions CTxMemPool::GetMemPoolChildren, CTxMemPool::GetMemPoolParents (Jeremy Rubin) 46d955d196043cc297834baeebce31ff778dff80 Remove mapLinks in favor of entry inlined structs with iterator type erasure (Jeremy Rubin) Pull request description: Currently we have a peculiar data structure in the mempool called maplinks. Maplinks job is to track the in-pool children and parents of each transaction. This PR can be primarily understood and reviewed as a simple refactoring to remove this extra data structure, although it comes with a nice memory and performance improvement for free. Maplinks is particularly peculiar because removing it is not as simple as just moving it's inner structure to the owning CTxMempoolEntry. Because TxLinks (the class storing the setEntries for parents and children) store txiters to each entry in the mempool corresponding to the parent or child, it means that the TxLinks type is "aware" of the boost multiindex (mapTx) it's coming from, which is in turn, aware of the entry type stored in mapTx. Thus we used maplinks to store this entry associated data we in an entirely separate data structure just to avoid a circular type reference caused by storing a txiter inside a CTxMempoolEntry. It turns out, we can kill this circular reference by making use of iterator_to multiindex function and std::reference_wrapper. This allows us to get rid of the maplinks data structure and move the ownership of the parents/child sets to the entries themselves. The benefit of this good all around, for any of the reasons given below the change would be acceptable, and it doesn't make the code harder to reason about or worse in any respect (as far as I can tell, there's no tradeoff). ### Simpler ownership model No longer having to consistency check that mapLinks did have records for our CTxMempoolEntry, impossible to have a mapLinks entry outlive or incorrectly die before a CTxMempoolEntry. ### Memory Usage We get rid of a O(Transactions) sized map in the mempool, which is a long lived data structure. ### Performance If you have a CTxMemPoolEntry, you immediately know the address of it's children/parents, rather than having to do a O(log(Transactions)) lookup via maplinks (which we do very often). We do it in *so many* places that a true benchmark has to look at a full running node, but it is easy enough to show an improvement in this case. The ComplexMemPool shows a good coherence check that we see the expected result of it being 12.5% faster / 1.14x faster. ``` Before: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.40462, 0.277222, 0.285339, 0.279793 After: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.22586, 0.243831, 0.247076, 0.244596 ``` The ComplexMemPool benchmark only checks doing addUnchecked and TrimToSize for 800 transactions. While this bench does a good job of hammering the relevant types of function, it doesn't test everything. Subbing in 5000 transactions shows a that the advantage isn't completely wiped out by other asymptotic factors (this isn't the only bottleneck in growing the mempool), but it's only a bit proportionally slower (10.8%, 1.12x), which adds evidence that this will be a good change for performance minded users. ``` # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 59.1321, 11.5919, 12.235, 11.7068 # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 52.1307, 10.2641, 10.5206, 10.4306 ``` I don't think it's possible to come up with an example of where a maplinks based design would have better performance, but it's something for reviewers to consider. # Discussion ## Why maplinks in the first place? I spoke with the author of mapLinks (sdaftuar) a while back, and my recollection from our conversation was that it was implemented because he did not know how to resolve the circular dependency at the time, and there was no other reason for making it a separate map. ## Is iterator_to weird? iterator_to is expressly for this purpose, see https://www.boost.org/doc/libs/1_51_0/libs/multi_index/doc/tutorial/indices.html#iterator_to > iterator_to provides a way to retrieve an iterator to an element from a pointer to the element, thus making iterators and pointers interchangeable for the purposes of element pointing (not so for traversal) in many situations. This notwithstanding, it is not the aim of iterator_to to promote the usage of pointers as substitutes for real iterators: the latter are specifically designed for handling the elements of a container, and not only benefit from the iterator orientation of container interfaces, but are also capable of exposing many more programming bugs than raw pointers, both at compile and run time. iterator_to is thus meant to be used in scenarios where access via iterators is not suitable or desireable: > > - Interoperability with preexisting APIs based on pointers or references. > - Publication of pointer-based interfaces (for instance, when designing a C-compatible library). > - The exposure of pointers in place of iterators can act as a type erasure barrier effectively decoupling the user of the code from the implementation detail of which particular container is being used. Similar techniques, like the famous Pimpl idiom, are used in large projects to reduce dependencies and build times. > - Self-referencing contexts where an element acts upon its owner container and no iterator to itself is available. In other words, iterator_to is the perfect tool for the job by the last reason given. Under the hood it should just be a simple pointer cast and have no major runtime overhead (depending on if the function call is inlined). Edit by laanwj: removed at sign from the description ACKs for top commit: jonatack: re-ACK 296be8f per `git range-diff ab338a19 3ba1665 296be8f`, sanity check gcc 10.2 debug build is clean. hebasto: re-ACK 296be8f58e02b39a58f017c52294aceed22c3ffd, only rebased since my [previous](https://github.com/bitcoin/bitcoin/pull/19478#pullrequestreview-482400727) review (verified with `git range-diff`). Tree-SHA512: f5c30a4936fcde6ae32a02823c303b3568a747c2681d11f87df88a149f984a6d3b4c81f391859afbeb68864ef7f6a3d8779f74a58e3de701b3d51f78e498682e
2020-09-07 12:06:38 +02:00
CTxMemPoolEntry::Children stageEntries, descendants;
stageEntries = updateIt->GetMemPoolChildrenConst();
estimatefee / estimatepriority RPC methods New RPC methods: return an estimate of the fee (or priority) a transaction needs to be likely to confirm in a given number of blocks. Mike Hearn created the first version of this method for estimating fees. It works as follows: For transactions that took 1 to N (I picked N=25) blocks to confirm, keep N buckets with at most 100 entries in each recording the fees-per-kilobyte paid by those transactions. (separate buckets are kept for transactions that confirmed because they are high-priority) The buckets are filled as blocks are found, and are saved/restored in a new fee_estiamtes.dat file in the data directory. A few variations on Mike's initial scheme: To estimate the fee needed for a transaction to confirm in X buckets, all of the samples in all of the buckets are used and a median of all of the data is used to make the estimate. For example, imagine 25 buckets each containing the full 100 entries. Those 2,500 samples are sorted, and the estimate of the fee needed to confirm in the very next block is the 50'th-highest-fee-entry in that sorted list; the estimate of the fee needed to confirm in the next two blocks is the 150'th-highest-fee-entry, etc. That algorithm has the nice property that estimates of how much fee you need to pay to get confirmed in block N will always be greater than or equal to the estimate for block N+1. It would clearly be wrong to say "pay 11 uBTC and you'll get confirmed in 3 blocks, but pay 12 uBTC and it will take LONGER". A single block will not contribute more than 10 entries to any one bucket, so a single miner and a large block cannot overwhelm the estimates.
2014-03-17 13:19:54 +01:00
while (!stageEntries.empty()) {
Merge #19478: Remove CTxMempool::mapLinks data structure member 296be8f58e02b39a58f017c52294aceed22c3ffd Get rid of unused functions CTxMemPool::GetMemPoolChildren, CTxMemPool::GetMemPoolParents (Jeremy Rubin) 46d955d196043cc297834baeebce31ff778dff80 Remove mapLinks in favor of entry inlined structs with iterator type erasure (Jeremy Rubin) Pull request description: Currently we have a peculiar data structure in the mempool called maplinks. Maplinks job is to track the in-pool children and parents of each transaction. This PR can be primarily understood and reviewed as a simple refactoring to remove this extra data structure, although it comes with a nice memory and performance improvement for free. Maplinks is particularly peculiar because removing it is not as simple as just moving it's inner structure to the owning CTxMempoolEntry. Because TxLinks (the class storing the setEntries for parents and children) store txiters to each entry in the mempool corresponding to the parent or child, it means that the TxLinks type is "aware" of the boost multiindex (mapTx) it's coming from, which is in turn, aware of the entry type stored in mapTx. Thus we used maplinks to store this entry associated data we in an entirely separate data structure just to avoid a circular type reference caused by storing a txiter inside a CTxMempoolEntry. It turns out, we can kill this circular reference by making use of iterator_to multiindex function and std::reference_wrapper. This allows us to get rid of the maplinks data structure and move the ownership of the parents/child sets to the entries themselves. The benefit of this good all around, for any of the reasons given below the change would be acceptable, and it doesn't make the code harder to reason about or worse in any respect (as far as I can tell, there's no tradeoff). ### Simpler ownership model No longer having to consistency check that mapLinks did have records for our CTxMempoolEntry, impossible to have a mapLinks entry outlive or incorrectly die before a CTxMempoolEntry. ### Memory Usage We get rid of a O(Transactions) sized map in the mempool, which is a long lived data structure. ### Performance If you have a CTxMemPoolEntry, you immediately know the address of it's children/parents, rather than having to do a O(log(Transactions)) lookup via maplinks (which we do very often). We do it in *so many* places that a true benchmark has to look at a full running node, but it is easy enough to show an improvement in this case. The ComplexMemPool shows a good coherence check that we see the expected result of it being 12.5% faster / 1.14x faster. ``` Before: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.40462, 0.277222, 0.285339, 0.279793 After: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.22586, 0.243831, 0.247076, 0.244596 ``` The ComplexMemPool benchmark only checks doing addUnchecked and TrimToSize for 800 transactions. While this bench does a good job of hammering the relevant types of function, it doesn't test everything. Subbing in 5000 transactions shows a that the advantage isn't completely wiped out by other asymptotic factors (this isn't the only bottleneck in growing the mempool), but it's only a bit proportionally slower (10.8%, 1.12x), which adds evidence that this will be a good change for performance minded users. ``` # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 59.1321, 11.5919, 12.235, 11.7068 # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 52.1307, 10.2641, 10.5206, 10.4306 ``` I don't think it's possible to come up with an example of where a maplinks based design would have better performance, but it's something for reviewers to consider. # Discussion ## Why maplinks in the first place? I spoke with the author of mapLinks (sdaftuar) a while back, and my recollection from our conversation was that it was implemented because he did not know how to resolve the circular dependency at the time, and there was no other reason for making it a separate map. ## Is iterator_to weird? iterator_to is expressly for this purpose, see https://www.boost.org/doc/libs/1_51_0/libs/multi_index/doc/tutorial/indices.html#iterator_to > iterator_to provides a way to retrieve an iterator to an element from a pointer to the element, thus making iterators and pointers interchangeable for the purposes of element pointing (not so for traversal) in many situations. This notwithstanding, it is not the aim of iterator_to to promote the usage of pointers as substitutes for real iterators: the latter are specifically designed for handling the elements of a container, and not only benefit from the iterator orientation of container interfaces, but are also capable of exposing many more programming bugs than raw pointers, both at compile and run time. iterator_to is thus meant to be used in scenarios where access via iterators is not suitable or desireable: > > - Interoperability with preexisting APIs based on pointers or references. > - Publication of pointer-based interfaces (for instance, when designing a C-compatible library). > - The exposure of pointers in place of iterators can act as a type erasure barrier effectively decoupling the user of the code from the implementation detail of which particular container is being used. Similar techniques, like the famous Pimpl idiom, are used in large projects to reduce dependencies and build times. > - Self-referencing contexts where an element acts upon its owner container and no iterator to itself is available. In other words, iterator_to is the perfect tool for the job by the last reason given. Under the hood it should just be a simple pointer cast and have no major runtime overhead (depending on if the function call is inlined). Edit by laanwj: removed at sign from the description ACKs for top commit: jonatack: re-ACK 296be8f per `git range-diff ab338a19 3ba1665 296be8f`, sanity check gcc 10.2 debug build is clean. hebasto: re-ACK 296be8f58e02b39a58f017c52294aceed22c3ffd, only rebased since my [previous](https://github.com/bitcoin/bitcoin/pull/19478#pullrequestreview-482400727) review (verified with `git range-diff`). Tree-SHA512: f5c30a4936fcde6ae32a02823c303b3568a747c2681d11f87df88a149f984a6d3b4c81f391859afbeb68864ef7f6a3d8779f74a58e3de701b3d51f78e498682e
2020-09-07 12:06:38 +02:00
const CTxMemPoolEntry& descendant = *stageEntries.begin();
descendants.insert(descendant);
stageEntries.erase(descendant);
const CTxMemPoolEntry::Children& children = descendant.GetMemPoolChildrenConst();
for (const CTxMemPoolEntry& childEntry : children) {
cacheMap::iterator cacheIt = cachedDescendants.find(mapTx.iterator_to(childEntry));
if (cacheIt != cachedDescendants.end()) {
// We've already calculated this one, just add the entries for this set
// but don't traverse again.
for (txiter cacheEntry : cacheIt->second) {
Merge #19478: Remove CTxMempool::mapLinks data structure member 296be8f58e02b39a58f017c52294aceed22c3ffd Get rid of unused functions CTxMemPool::GetMemPoolChildren, CTxMemPool::GetMemPoolParents (Jeremy Rubin) 46d955d196043cc297834baeebce31ff778dff80 Remove mapLinks in favor of entry inlined structs with iterator type erasure (Jeremy Rubin) Pull request description: Currently we have a peculiar data structure in the mempool called maplinks. Maplinks job is to track the in-pool children and parents of each transaction. This PR can be primarily understood and reviewed as a simple refactoring to remove this extra data structure, although it comes with a nice memory and performance improvement for free. Maplinks is particularly peculiar because removing it is not as simple as just moving it's inner structure to the owning CTxMempoolEntry. Because TxLinks (the class storing the setEntries for parents and children) store txiters to each entry in the mempool corresponding to the parent or child, it means that the TxLinks type is "aware" of the boost multiindex (mapTx) it's coming from, which is in turn, aware of the entry type stored in mapTx. Thus we used maplinks to store this entry associated data we in an entirely separate data structure just to avoid a circular type reference caused by storing a txiter inside a CTxMempoolEntry. It turns out, we can kill this circular reference by making use of iterator_to multiindex function and std::reference_wrapper. This allows us to get rid of the maplinks data structure and move the ownership of the parents/child sets to the entries themselves. The benefit of this good all around, for any of the reasons given below the change would be acceptable, and it doesn't make the code harder to reason about or worse in any respect (as far as I can tell, there's no tradeoff). ### Simpler ownership model No longer having to consistency check that mapLinks did have records for our CTxMempoolEntry, impossible to have a mapLinks entry outlive or incorrectly die before a CTxMempoolEntry. ### Memory Usage We get rid of a O(Transactions) sized map in the mempool, which is a long lived data structure. ### Performance If you have a CTxMemPoolEntry, you immediately know the address of it's children/parents, rather than having to do a O(log(Transactions)) lookup via maplinks (which we do very often). We do it in *so many* places that a true benchmark has to look at a full running node, but it is easy enough to show an improvement in this case. The ComplexMemPool shows a good coherence check that we see the expected result of it being 12.5% faster / 1.14x faster. ``` Before: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.40462, 0.277222, 0.285339, 0.279793 After: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.22586, 0.243831, 0.247076, 0.244596 ``` The ComplexMemPool benchmark only checks doing addUnchecked and TrimToSize for 800 transactions. While this bench does a good job of hammering the relevant types of function, it doesn't test everything. Subbing in 5000 transactions shows a that the advantage isn't completely wiped out by other asymptotic factors (this isn't the only bottleneck in growing the mempool), but it's only a bit proportionally slower (10.8%, 1.12x), which adds evidence that this will be a good change for performance minded users. ``` # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 59.1321, 11.5919, 12.235, 11.7068 # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 52.1307, 10.2641, 10.5206, 10.4306 ``` I don't think it's possible to come up with an example of where a maplinks based design would have better performance, but it's something for reviewers to consider. # Discussion ## Why maplinks in the first place? I spoke with the author of mapLinks (sdaftuar) a while back, and my recollection from our conversation was that it was implemented because he did not know how to resolve the circular dependency at the time, and there was no other reason for making it a separate map. ## Is iterator_to weird? iterator_to is expressly for this purpose, see https://www.boost.org/doc/libs/1_51_0/libs/multi_index/doc/tutorial/indices.html#iterator_to > iterator_to provides a way to retrieve an iterator to an element from a pointer to the element, thus making iterators and pointers interchangeable for the purposes of element pointing (not so for traversal) in many situations. This notwithstanding, it is not the aim of iterator_to to promote the usage of pointers as substitutes for real iterators: the latter are specifically designed for handling the elements of a container, and not only benefit from the iterator orientation of container interfaces, but are also capable of exposing many more programming bugs than raw pointers, both at compile and run time. iterator_to is thus meant to be used in scenarios where access via iterators is not suitable or desireable: > > - Interoperability with preexisting APIs based on pointers or references. > - Publication of pointer-based interfaces (for instance, when designing a C-compatible library). > - The exposure of pointers in place of iterators can act as a type erasure barrier effectively decoupling the user of the code from the implementation detail of which particular container is being used. Similar techniques, like the famous Pimpl idiom, are used in large projects to reduce dependencies and build times. > - Self-referencing contexts where an element acts upon its owner container and no iterator to itself is available. In other words, iterator_to is the perfect tool for the job by the last reason given. Under the hood it should just be a simple pointer cast and have no major runtime overhead (depending on if the function call is inlined). Edit by laanwj: removed at sign from the description ACKs for top commit: jonatack: re-ACK 296be8f per `git range-diff ab338a19 3ba1665 296be8f`, sanity check gcc 10.2 debug build is clean. hebasto: re-ACK 296be8f58e02b39a58f017c52294aceed22c3ffd, only rebased since my [previous](https://github.com/bitcoin/bitcoin/pull/19478#pullrequestreview-482400727) review (verified with `git range-diff`). Tree-SHA512: f5c30a4936fcde6ae32a02823c303b3568a747c2681d11f87df88a149f984a6d3b4c81f391859afbeb68864ef7f6a3d8779f74a58e3de701b3d51f78e498682e
2020-09-07 12:06:38 +02:00
descendants.insert(*cacheEntry);
}
Merge #19478: Remove CTxMempool::mapLinks data structure member 296be8f58e02b39a58f017c52294aceed22c3ffd Get rid of unused functions CTxMemPool::GetMemPoolChildren, CTxMemPool::GetMemPoolParents (Jeremy Rubin) 46d955d196043cc297834baeebce31ff778dff80 Remove mapLinks in favor of entry inlined structs with iterator type erasure (Jeremy Rubin) Pull request description: Currently we have a peculiar data structure in the mempool called maplinks. Maplinks job is to track the in-pool children and parents of each transaction. This PR can be primarily understood and reviewed as a simple refactoring to remove this extra data structure, although it comes with a nice memory and performance improvement for free. Maplinks is particularly peculiar because removing it is not as simple as just moving it's inner structure to the owning CTxMempoolEntry. Because TxLinks (the class storing the setEntries for parents and children) store txiters to each entry in the mempool corresponding to the parent or child, it means that the TxLinks type is "aware" of the boost multiindex (mapTx) it's coming from, which is in turn, aware of the entry type stored in mapTx. Thus we used maplinks to store this entry associated data we in an entirely separate data structure just to avoid a circular type reference caused by storing a txiter inside a CTxMempoolEntry. It turns out, we can kill this circular reference by making use of iterator_to multiindex function and std::reference_wrapper. This allows us to get rid of the maplinks data structure and move the ownership of the parents/child sets to the entries themselves. The benefit of this good all around, for any of the reasons given below the change would be acceptable, and it doesn't make the code harder to reason about or worse in any respect (as far as I can tell, there's no tradeoff). ### Simpler ownership model No longer having to consistency check that mapLinks did have records for our CTxMempoolEntry, impossible to have a mapLinks entry outlive or incorrectly die before a CTxMempoolEntry. ### Memory Usage We get rid of a O(Transactions) sized map in the mempool, which is a long lived data structure. ### Performance If you have a CTxMemPoolEntry, you immediately know the address of it's children/parents, rather than having to do a O(log(Transactions)) lookup via maplinks (which we do very often). We do it in *so many* places that a true benchmark has to look at a full running node, but it is easy enough to show an improvement in this case. The ComplexMemPool shows a good coherence check that we see the expected result of it being 12.5% faster / 1.14x faster. ``` Before: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.40462, 0.277222, 0.285339, 0.279793 After: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.22586, 0.243831, 0.247076, 0.244596 ``` The ComplexMemPool benchmark only checks doing addUnchecked and TrimToSize for 800 transactions. While this bench does a good job of hammering the relevant types of function, it doesn't test everything. Subbing in 5000 transactions shows a that the advantage isn't completely wiped out by other asymptotic factors (this isn't the only bottleneck in growing the mempool), but it's only a bit proportionally slower (10.8%, 1.12x), which adds evidence that this will be a good change for performance minded users. ``` # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 59.1321, 11.5919, 12.235, 11.7068 # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 52.1307, 10.2641, 10.5206, 10.4306 ``` I don't think it's possible to come up with an example of where a maplinks based design would have better performance, but it's something for reviewers to consider. # Discussion ## Why maplinks in the first place? I spoke with the author of mapLinks (sdaftuar) a while back, and my recollection from our conversation was that it was implemented because he did not know how to resolve the circular dependency at the time, and there was no other reason for making it a separate map. ## Is iterator_to weird? iterator_to is expressly for this purpose, see https://www.boost.org/doc/libs/1_51_0/libs/multi_index/doc/tutorial/indices.html#iterator_to > iterator_to provides a way to retrieve an iterator to an element from a pointer to the element, thus making iterators and pointers interchangeable for the purposes of element pointing (not so for traversal) in many situations. This notwithstanding, it is not the aim of iterator_to to promote the usage of pointers as substitutes for real iterators: the latter are specifically designed for handling the elements of a container, and not only benefit from the iterator orientation of container interfaces, but are also capable of exposing many more programming bugs than raw pointers, both at compile and run time. iterator_to is thus meant to be used in scenarios where access via iterators is not suitable or desireable: > > - Interoperability with preexisting APIs based on pointers or references. > - Publication of pointer-based interfaces (for instance, when designing a C-compatible library). > - The exposure of pointers in place of iterators can act as a type erasure barrier effectively decoupling the user of the code from the implementation detail of which particular container is being used. Similar techniques, like the famous Pimpl idiom, are used in large projects to reduce dependencies and build times. > - Self-referencing contexts where an element acts upon its owner container and no iterator to itself is available. In other words, iterator_to is the perfect tool for the job by the last reason given. Under the hood it should just be a simple pointer cast and have no major runtime overhead (depending on if the function call is inlined). Edit by laanwj: removed at sign from the description ACKs for top commit: jonatack: re-ACK 296be8f per `git range-diff ab338a19 3ba1665 296be8f`, sanity check gcc 10.2 debug build is clean. hebasto: re-ACK 296be8f58e02b39a58f017c52294aceed22c3ffd, only rebased since my [previous](https://github.com/bitcoin/bitcoin/pull/19478#pullrequestreview-482400727) review (verified with `git range-diff`). Tree-SHA512: f5c30a4936fcde6ae32a02823c303b3568a747c2681d11f87df88a149f984a6d3b4c81f391859afbeb68864ef7f6a3d8779f74a58e3de701b3d51f78e498682e
2020-09-07 12:06:38 +02:00
} else if (!descendants.count(childEntry)) {
// Schedule for later processing
stageEntries.insert(childEntry);
}
estimatefee / estimatepriority RPC methods New RPC methods: return an estimate of the fee (or priority) a transaction needs to be likely to confirm in a given number of blocks. Mike Hearn created the first version of this method for estimating fees. It works as follows: For transactions that took 1 to N (I picked N=25) blocks to confirm, keep N buckets with at most 100 entries in each recording the fees-per-kilobyte paid by those transactions. (separate buckets are kept for transactions that confirmed because they are high-priority) The buckets are filled as blocks are found, and are saved/restored in a new fee_estiamtes.dat file in the data directory. A few variations on Mike's initial scheme: To estimate the fee needed for a transaction to confirm in X buckets, all of the samples in all of the buckets are used and a median of all of the data is used to make the estimate. For example, imagine 25 buckets each containing the full 100 entries. Those 2,500 samples are sorted, and the estimate of the fee needed to confirm in the very next block is the 50'th-highest-fee-entry in that sorted list; the estimate of the fee needed to confirm in the next two blocks is the 150'th-highest-fee-entry, etc. That algorithm has the nice property that estimates of how much fee you need to pay to get confirmed in block N will always be greater than or equal to the estimate for block N+1. It would clearly be wrong to say "pay 11 uBTC and you'll get confirmed in 3 blocks, but pay 12 uBTC and it will take LONGER". A single block will not contribute more than 10 entries to any one bucket, so a single miner and a large block cannot overwhelm the estimates.
2014-03-17 13:19:54 +01:00
}
}
Merge #19478: Remove CTxMempool::mapLinks data structure member 296be8f58e02b39a58f017c52294aceed22c3ffd Get rid of unused functions CTxMemPool::GetMemPoolChildren, CTxMemPool::GetMemPoolParents (Jeremy Rubin) 46d955d196043cc297834baeebce31ff778dff80 Remove mapLinks in favor of entry inlined structs with iterator type erasure (Jeremy Rubin) Pull request description: Currently we have a peculiar data structure in the mempool called maplinks. Maplinks job is to track the in-pool children and parents of each transaction. This PR can be primarily understood and reviewed as a simple refactoring to remove this extra data structure, although it comes with a nice memory and performance improvement for free. Maplinks is particularly peculiar because removing it is not as simple as just moving it's inner structure to the owning CTxMempoolEntry. Because TxLinks (the class storing the setEntries for parents and children) store txiters to each entry in the mempool corresponding to the parent or child, it means that the TxLinks type is "aware" of the boost multiindex (mapTx) it's coming from, which is in turn, aware of the entry type stored in mapTx. Thus we used maplinks to store this entry associated data we in an entirely separate data structure just to avoid a circular type reference caused by storing a txiter inside a CTxMempoolEntry. It turns out, we can kill this circular reference by making use of iterator_to multiindex function and std::reference_wrapper. This allows us to get rid of the maplinks data structure and move the ownership of the parents/child sets to the entries themselves. The benefit of this good all around, for any of the reasons given below the change would be acceptable, and it doesn't make the code harder to reason about or worse in any respect (as far as I can tell, there's no tradeoff). ### Simpler ownership model No longer having to consistency check that mapLinks did have records for our CTxMempoolEntry, impossible to have a mapLinks entry outlive or incorrectly die before a CTxMempoolEntry. ### Memory Usage We get rid of a O(Transactions) sized map in the mempool, which is a long lived data structure. ### Performance If you have a CTxMemPoolEntry, you immediately know the address of it's children/parents, rather than having to do a O(log(Transactions)) lookup via maplinks (which we do very often). We do it in *so many* places that a true benchmark has to look at a full running node, but it is easy enough to show an improvement in this case. The ComplexMemPool shows a good coherence check that we see the expected result of it being 12.5% faster / 1.14x faster. ``` Before: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.40462, 0.277222, 0.285339, 0.279793 After: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.22586, 0.243831, 0.247076, 0.244596 ``` The ComplexMemPool benchmark only checks doing addUnchecked and TrimToSize for 800 transactions. While this bench does a good job of hammering the relevant types of function, it doesn't test everything. Subbing in 5000 transactions shows a that the advantage isn't completely wiped out by other asymptotic factors (this isn't the only bottleneck in growing the mempool), but it's only a bit proportionally slower (10.8%, 1.12x), which adds evidence that this will be a good change for performance minded users. ``` # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 59.1321, 11.5919, 12.235, 11.7068 # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 52.1307, 10.2641, 10.5206, 10.4306 ``` I don't think it's possible to come up with an example of where a maplinks based design would have better performance, but it's something for reviewers to consider. # Discussion ## Why maplinks in the first place? I spoke with the author of mapLinks (sdaftuar) a while back, and my recollection from our conversation was that it was implemented because he did not know how to resolve the circular dependency at the time, and there was no other reason for making it a separate map. ## Is iterator_to weird? iterator_to is expressly for this purpose, see https://www.boost.org/doc/libs/1_51_0/libs/multi_index/doc/tutorial/indices.html#iterator_to > iterator_to provides a way to retrieve an iterator to an element from a pointer to the element, thus making iterators and pointers interchangeable for the purposes of element pointing (not so for traversal) in many situations. This notwithstanding, it is not the aim of iterator_to to promote the usage of pointers as substitutes for real iterators: the latter are specifically designed for handling the elements of a container, and not only benefit from the iterator orientation of container interfaces, but are also capable of exposing many more programming bugs than raw pointers, both at compile and run time. iterator_to is thus meant to be used in scenarios where access via iterators is not suitable or desireable: > > - Interoperability with preexisting APIs based on pointers or references. > - Publication of pointer-based interfaces (for instance, when designing a C-compatible library). > - The exposure of pointers in place of iterators can act as a type erasure barrier effectively decoupling the user of the code from the implementation detail of which particular container is being used. Similar techniques, like the famous Pimpl idiom, are used in large projects to reduce dependencies and build times. > - Self-referencing contexts where an element acts upon its owner container and no iterator to itself is available. In other words, iterator_to is the perfect tool for the job by the last reason given. Under the hood it should just be a simple pointer cast and have no major runtime overhead (depending on if the function call is inlined). Edit by laanwj: removed at sign from the description ACKs for top commit: jonatack: re-ACK 296be8f per `git range-diff ab338a19 3ba1665 296be8f`, sanity check gcc 10.2 debug build is clean. hebasto: re-ACK 296be8f58e02b39a58f017c52294aceed22c3ffd, only rebased since my [previous](https://github.com/bitcoin/bitcoin/pull/19478#pullrequestreview-482400727) review (verified with `git range-diff`). Tree-SHA512: f5c30a4936fcde6ae32a02823c303b3568a747c2681d11f87df88a149f984a6d3b4c81f391859afbeb68864ef7f6a3d8779f74a58e3de701b3d51f78e498682e
2020-09-07 12:06:38 +02:00
// descendants now contains all in-mempool descendants of updateIt.
// Update and add to cached descendant map
int64_t modifySize = 0;
CAmount modifyFee = 0;
int64_t modifyCount = 0;
Merge #19478: Remove CTxMempool::mapLinks data structure member 296be8f58e02b39a58f017c52294aceed22c3ffd Get rid of unused functions CTxMemPool::GetMemPoolChildren, CTxMemPool::GetMemPoolParents (Jeremy Rubin) 46d955d196043cc297834baeebce31ff778dff80 Remove mapLinks in favor of entry inlined structs with iterator type erasure (Jeremy Rubin) Pull request description: Currently we have a peculiar data structure in the mempool called maplinks. Maplinks job is to track the in-pool children and parents of each transaction. This PR can be primarily understood and reviewed as a simple refactoring to remove this extra data structure, although it comes with a nice memory and performance improvement for free. Maplinks is particularly peculiar because removing it is not as simple as just moving it's inner structure to the owning CTxMempoolEntry. Because TxLinks (the class storing the setEntries for parents and children) store txiters to each entry in the mempool corresponding to the parent or child, it means that the TxLinks type is "aware" of the boost multiindex (mapTx) it's coming from, which is in turn, aware of the entry type stored in mapTx. Thus we used maplinks to store this entry associated data we in an entirely separate data structure just to avoid a circular type reference caused by storing a txiter inside a CTxMempoolEntry. It turns out, we can kill this circular reference by making use of iterator_to multiindex function and std::reference_wrapper. This allows us to get rid of the maplinks data structure and move the ownership of the parents/child sets to the entries themselves. The benefit of this good all around, for any of the reasons given below the change would be acceptable, and it doesn't make the code harder to reason about or worse in any respect (as far as I can tell, there's no tradeoff). ### Simpler ownership model No longer having to consistency check that mapLinks did have records for our CTxMempoolEntry, impossible to have a mapLinks entry outlive or incorrectly die before a CTxMempoolEntry. ### Memory Usage We get rid of a O(Transactions) sized map in the mempool, which is a long lived data structure. ### Performance If you have a CTxMemPoolEntry, you immediately know the address of it's children/parents, rather than having to do a O(log(Transactions)) lookup via maplinks (which we do very often). We do it in *so many* places that a true benchmark has to look at a full running node, but it is easy enough to show an improvement in this case. The ComplexMemPool shows a good coherence check that we see the expected result of it being 12.5% faster / 1.14x faster. ``` Before: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.40462, 0.277222, 0.285339, 0.279793 After: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.22586, 0.243831, 0.247076, 0.244596 ``` The ComplexMemPool benchmark only checks doing addUnchecked and TrimToSize for 800 transactions. While this bench does a good job of hammering the relevant types of function, it doesn't test everything. Subbing in 5000 transactions shows a that the advantage isn't completely wiped out by other asymptotic factors (this isn't the only bottleneck in growing the mempool), but it's only a bit proportionally slower (10.8%, 1.12x), which adds evidence that this will be a good change for performance minded users. ``` # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 59.1321, 11.5919, 12.235, 11.7068 # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 52.1307, 10.2641, 10.5206, 10.4306 ``` I don't think it's possible to come up with an example of where a maplinks based design would have better performance, but it's something for reviewers to consider. # Discussion ## Why maplinks in the first place? I spoke with the author of mapLinks (sdaftuar) a while back, and my recollection from our conversation was that it was implemented because he did not know how to resolve the circular dependency at the time, and there was no other reason for making it a separate map. ## Is iterator_to weird? iterator_to is expressly for this purpose, see https://www.boost.org/doc/libs/1_51_0/libs/multi_index/doc/tutorial/indices.html#iterator_to > iterator_to provides a way to retrieve an iterator to an element from a pointer to the element, thus making iterators and pointers interchangeable for the purposes of element pointing (not so for traversal) in many situations. This notwithstanding, it is not the aim of iterator_to to promote the usage of pointers as substitutes for real iterators: the latter are specifically designed for handling the elements of a container, and not only benefit from the iterator orientation of container interfaces, but are also capable of exposing many more programming bugs than raw pointers, both at compile and run time. iterator_to is thus meant to be used in scenarios where access via iterators is not suitable or desireable: > > - Interoperability with preexisting APIs based on pointers or references. > - Publication of pointer-based interfaces (for instance, when designing a C-compatible library). > - The exposure of pointers in place of iterators can act as a type erasure barrier effectively decoupling the user of the code from the implementation detail of which particular container is being used. Similar techniques, like the famous Pimpl idiom, are used in large projects to reduce dependencies and build times. > - Self-referencing contexts where an element acts upon its owner container and no iterator to itself is available. In other words, iterator_to is the perfect tool for the job by the last reason given. Under the hood it should just be a simple pointer cast and have no major runtime overhead (depending on if the function call is inlined). Edit by laanwj: removed at sign from the description ACKs for top commit: jonatack: re-ACK 296be8f per `git range-diff ab338a19 3ba1665 296be8f`, sanity check gcc 10.2 debug build is clean. hebasto: re-ACK 296be8f58e02b39a58f017c52294aceed22c3ffd, only rebased since my [previous](https://github.com/bitcoin/bitcoin/pull/19478#pullrequestreview-482400727) review (verified with `git range-diff`). Tree-SHA512: f5c30a4936fcde6ae32a02823c303b3568a747c2681d11f87df88a149f984a6d3b4c81f391859afbeb68864ef7f6a3d8779f74a58e3de701b3d51f78e498682e
2020-09-07 12:06:38 +02:00
for (const CTxMemPoolEntry& descendant : descendants) {
if (!setExclude.count(descendant.GetTx().GetHash())) {
modifySize += descendant.GetTxSize();
modifyFee += descendant.GetModifiedFee();
modifyCount++;
Merge #19478: Remove CTxMempool::mapLinks data structure member 296be8f58e02b39a58f017c52294aceed22c3ffd Get rid of unused functions CTxMemPool::GetMemPoolChildren, CTxMemPool::GetMemPoolParents (Jeremy Rubin) 46d955d196043cc297834baeebce31ff778dff80 Remove mapLinks in favor of entry inlined structs with iterator type erasure (Jeremy Rubin) Pull request description: Currently we have a peculiar data structure in the mempool called maplinks. Maplinks job is to track the in-pool children and parents of each transaction. This PR can be primarily understood and reviewed as a simple refactoring to remove this extra data structure, although it comes with a nice memory and performance improvement for free. Maplinks is particularly peculiar because removing it is not as simple as just moving it's inner structure to the owning CTxMempoolEntry. Because TxLinks (the class storing the setEntries for parents and children) store txiters to each entry in the mempool corresponding to the parent or child, it means that the TxLinks type is "aware" of the boost multiindex (mapTx) it's coming from, which is in turn, aware of the entry type stored in mapTx. Thus we used maplinks to store this entry associated data we in an entirely separate data structure just to avoid a circular type reference caused by storing a txiter inside a CTxMempoolEntry. It turns out, we can kill this circular reference by making use of iterator_to multiindex function and std::reference_wrapper. This allows us to get rid of the maplinks data structure and move the ownership of the parents/child sets to the entries themselves. The benefit of this good all around, for any of the reasons given below the change would be acceptable, and it doesn't make the code harder to reason about or worse in any respect (as far as I can tell, there's no tradeoff). ### Simpler ownership model No longer having to consistency check that mapLinks did have records for our CTxMempoolEntry, impossible to have a mapLinks entry outlive or incorrectly die before a CTxMempoolEntry. ### Memory Usage We get rid of a O(Transactions) sized map in the mempool, which is a long lived data structure. ### Performance If you have a CTxMemPoolEntry, you immediately know the address of it's children/parents, rather than having to do a O(log(Transactions)) lookup via maplinks (which we do very often). We do it in *so many* places that a true benchmark has to look at a full running node, but it is easy enough to show an improvement in this case. The ComplexMemPool shows a good coherence check that we see the expected result of it being 12.5% faster / 1.14x faster. ``` Before: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.40462, 0.277222, 0.285339, 0.279793 After: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.22586, 0.243831, 0.247076, 0.244596 ``` The ComplexMemPool benchmark only checks doing addUnchecked and TrimToSize for 800 transactions. While this bench does a good job of hammering the relevant types of function, it doesn't test everything. Subbing in 5000 transactions shows a that the advantage isn't completely wiped out by other asymptotic factors (this isn't the only bottleneck in growing the mempool), but it's only a bit proportionally slower (10.8%, 1.12x), which adds evidence that this will be a good change for performance minded users. ``` # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 59.1321, 11.5919, 12.235, 11.7068 # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 52.1307, 10.2641, 10.5206, 10.4306 ``` I don't think it's possible to come up with an example of where a maplinks based design would have better performance, but it's something for reviewers to consider. # Discussion ## Why maplinks in the first place? I spoke with the author of mapLinks (sdaftuar) a while back, and my recollection from our conversation was that it was implemented because he did not know how to resolve the circular dependency at the time, and there was no other reason for making it a separate map. ## Is iterator_to weird? iterator_to is expressly for this purpose, see https://www.boost.org/doc/libs/1_51_0/libs/multi_index/doc/tutorial/indices.html#iterator_to > iterator_to provides a way to retrieve an iterator to an element from a pointer to the element, thus making iterators and pointers interchangeable for the purposes of element pointing (not so for traversal) in many situations. This notwithstanding, it is not the aim of iterator_to to promote the usage of pointers as substitutes for real iterators: the latter are specifically designed for handling the elements of a container, and not only benefit from the iterator orientation of container interfaces, but are also capable of exposing many more programming bugs than raw pointers, both at compile and run time. iterator_to is thus meant to be used in scenarios where access via iterators is not suitable or desireable: > > - Interoperability with preexisting APIs based on pointers or references. > - Publication of pointer-based interfaces (for instance, when designing a C-compatible library). > - The exposure of pointers in place of iterators can act as a type erasure barrier effectively decoupling the user of the code from the implementation detail of which particular container is being used. Similar techniques, like the famous Pimpl idiom, are used in large projects to reduce dependencies and build times. > - Self-referencing contexts where an element acts upon its owner container and no iterator to itself is available. In other words, iterator_to is the perfect tool for the job by the last reason given. Under the hood it should just be a simple pointer cast and have no major runtime overhead (depending on if the function call is inlined). Edit by laanwj: removed at sign from the description ACKs for top commit: jonatack: re-ACK 296be8f per `git range-diff ab338a19 3ba1665 296be8f`, sanity check gcc 10.2 debug build is clean. hebasto: re-ACK 296be8f58e02b39a58f017c52294aceed22c3ffd, only rebased since my [previous](https://github.com/bitcoin/bitcoin/pull/19478#pullrequestreview-482400727) review (verified with `git range-diff`). Tree-SHA512: f5c30a4936fcde6ae32a02823c303b3568a747c2681d11f87df88a149f984a6d3b4c81f391859afbeb68864ef7f6a3d8779f74a58e3de701b3d51f78e498682e
2020-09-07 12:06:38 +02:00
cachedDescendants[updateIt].insert(mapTx.iterator_to(descendant));
// Update ancestor state for each descendant
Merge #19478: Remove CTxMempool::mapLinks data structure member 296be8f58e02b39a58f017c52294aceed22c3ffd Get rid of unused functions CTxMemPool::GetMemPoolChildren, CTxMemPool::GetMemPoolParents (Jeremy Rubin) 46d955d196043cc297834baeebce31ff778dff80 Remove mapLinks in favor of entry inlined structs with iterator type erasure (Jeremy Rubin) Pull request description: Currently we have a peculiar data structure in the mempool called maplinks. Maplinks job is to track the in-pool children and parents of each transaction. This PR can be primarily understood and reviewed as a simple refactoring to remove this extra data structure, although it comes with a nice memory and performance improvement for free. Maplinks is particularly peculiar because removing it is not as simple as just moving it's inner structure to the owning CTxMempoolEntry. Because TxLinks (the class storing the setEntries for parents and children) store txiters to each entry in the mempool corresponding to the parent or child, it means that the TxLinks type is "aware" of the boost multiindex (mapTx) it's coming from, which is in turn, aware of the entry type stored in mapTx. Thus we used maplinks to store this entry associated data we in an entirely separate data structure just to avoid a circular type reference caused by storing a txiter inside a CTxMempoolEntry. It turns out, we can kill this circular reference by making use of iterator_to multiindex function and std::reference_wrapper. This allows us to get rid of the maplinks data structure and move the ownership of the parents/child sets to the entries themselves. The benefit of this good all around, for any of the reasons given below the change would be acceptable, and it doesn't make the code harder to reason about or worse in any respect (as far as I can tell, there's no tradeoff). ### Simpler ownership model No longer having to consistency check that mapLinks did have records for our CTxMempoolEntry, impossible to have a mapLinks entry outlive or incorrectly die before a CTxMempoolEntry. ### Memory Usage We get rid of a O(Transactions) sized map in the mempool, which is a long lived data structure. ### Performance If you have a CTxMemPoolEntry, you immediately know the address of it's children/parents, rather than having to do a O(log(Transactions)) lookup via maplinks (which we do very often). We do it in *so many* places that a true benchmark has to look at a full running node, but it is easy enough to show an improvement in this case. The ComplexMemPool shows a good coherence check that we see the expected result of it being 12.5% faster / 1.14x faster. ``` Before: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.40462, 0.277222, 0.285339, 0.279793 After: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.22586, 0.243831, 0.247076, 0.244596 ``` The ComplexMemPool benchmark only checks doing addUnchecked and TrimToSize for 800 transactions. While this bench does a good job of hammering the relevant types of function, it doesn't test everything. Subbing in 5000 transactions shows a that the advantage isn't completely wiped out by other asymptotic factors (this isn't the only bottleneck in growing the mempool), but it's only a bit proportionally slower (10.8%, 1.12x), which adds evidence that this will be a good change for performance minded users. ``` # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 59.1321, 11.5919, 12.235, 11.7068 # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 52.1307, 10.2641, 10.5206, 10.4306 ``` I don't think it's possible to come up with an example of where a maplinks based design would have better performance, but it's something for reviewers to consider. # Discussion ## Why maplinks in the first place? I spoke with the author of mapLinks (sdaftuar) a while back, and my recollection from our conversation was that it was implemented because he did not know how to resolve the circular dependency at the time, and there was no other reason for making it a separate map. ## Is iterator_to weird? iterator_to is expressly for this purpose, see https://www.boost.org/doc/libs/1_51_0/libs/multi_index/doc/tutorial/indices.html#iterator_to > iterator_to provides a way to retrieve an iterator to an element from a pointer to the element, thus making iterators and pointers interchangeable for the purposes of element pointing (not so for traversal) in many situations. This notwithstanding, it is not the aim of iterator_to to promote the usage of pointers as substitutes for real iterators: the latter are specifically designed for handling the elements of a container, and not only benefit from the iterator orientation of container interfaces, but are also capable of exposing many more programming bugs than raw pointers, both at compile and run time. iterator_to is thus meant to be used in scenarios where access via iterators is not suitable or desireable: > > - Interoperability with preexisting APIs based on pointers or references. > - Publication of pointer-based interfaces (for instance, when designing a C-compatible library). > - The exposure of pointers in place of iterators can act as a type erasure barrier effectively decoupling the user of the code from the implementation detail of which particular container is being used. Similar techniques, like the famous Pimpl idiom, are used in large projects to reduce dependencies and build times. > - Self-referencing contexts where an element acts upon its owner container and no iterator to itself is available. In other words, iterator_to is the perfect tool for the job by the last reason given. Under the hood it should just be a simple pointer cast and have no major runtime overhead (depending on if the function call is inlined). Edit by laanwj: removed at sign from the description ACKs for top commit: jonatack: re-ACK 296be8f per `git range-diff ab338a19 3ba1665 296be8f`, sanity check gcc 10.2 debug build is clean. hebasto: re-ACK 296be8f58e02b39a58f017c52294aceed22c3ffd, only rebased since my [previous](https://github.com/bitcoin/bitcoin/pull/19478#pullrequestreview-482400727) review (verified with `git range-diff`). Tree-SHA512: f5c30a4936fcde6ae32a02823c303b3568a747c2681d11f87df88a149f984a6d3b4c81f391859afbeb68864ef7f6a3d8779f74a58e3de701b3d51f78e498682e
2020-09-07 12:06:38 +02:00
mapTx.modify(mapTx.iterator_to(descendant), update_ancestor_state(updateIt->GetTxSize(), updateIt->GetModifiedFee(), 1, updateIt->GetSigOpCount()));
estimatefee / estimatepriority RPC methods New RPC methods: return an estimate of the fee (or priority) a transaction needs to be likely to confirm in a given number of blocks. Mike Hearn created the first version of this method for estimating fees. It works as follows: For transactions that took 1 to N (I picked N=25) blocks to confirm, keep N buckets with at most 100 entries in each recording the fees-per-kilobyte paid by those transactions. (separate buckets are kept for transactions that confirmed because they are high-priority) The buckets are filled as blocks are found, and are saved/restored in a new fee_estiamtes.dat file in the data directory. A few variations on Mike's initial scheme: To estimate the fee needed for a transaction to confirm in X buckets, all of the samples in all of the buckets are used and a median of all of the data is used to make the estimate. For example, imagine 25 buckets each containing the full 100 entries. Those 2,500 samples are sorted, and the estimate of the fee needed to confirm in the very next block is the 50'th-highest-fee-entry in that sorted list; the estimate of the fee needed to confirm in the next two blocks is the 150'th-highest-fee-entry, etc. That algorithm has the nice property that estimates of how much fee you need to pay to get confirmed in block N will always be greater than or equal to the estimate for block N+1. It would clearly be wrong to say "pay 11 uBTC and you'll get confirmed in 3 blocks, but pay 12 uBTC and it will take LONGER". A single block will not contribute more than 10 entries to any one bucket, so a single miner and a large block cannot overwhelm the estimates.
2014-03-17 13:19:54 +01:00
}
}
mapTx.modify(updateIt, update_descendant_state(modifySize, modifyFee, modifyCount));
}
estimatefee / estimatepriority RPC methods New RPC methods: return an estimate of the fee (or priority) a transaction needs to be likely to confirm in a given number of blocks. Mike Hearn created the first version of this method for estimating fees. It works as follows: For transactions that took 1 to N (I picked N=25) blocks to confirm, keep N buckets with at most 100 entries in each recording the fees-per-kilobyte paid by those transactions. (separate buckets are kept for transactions that confirmed because they are high-priority) The buckets are filled as blocks are found, and are saved/restored in a new fee_estiamtes.dat file in the data directory. A few variations on Mike's initial scheme: To estimate the fee needed for a transaction to confirm in X buckets, all of the samples in all of the buckets are used and a median of all of the data is used to make the estimate. For example, imagine 25 buckets each containing the full 100 entries. Those 2,500 samples are sorted, and the estimate of the fee needed to confirm in the very next block is the 50'th-highest-fee-entry in that sorted list; the estimate of the fee needed to confirm in the next two blocks is the 150'th-highest-fee-entry, etc. That algorithm has the nice property that estimates of how much fee you need to pay to get confirmed in block N will always be greater than or equal to the estimate for block N+1. It would clearly be wrong to say "pay 11 uBTC and you'll get confirmed in 3 blocks, but pay 12 uBTC and it will take LONGER". A single block will not contribute more than 10 entries to any one bucket, so a single miner and a large block cannot overwhelm the estimates.
2014-03-17 13:19:54 +01:00
// vHashesToUpdate is the set of transaction hashes from a disconnected block
// which has been re-added to the mempool.
// for each entry, look for descendants that are outside vHashesToUpdate, and
// add fee/size information for such descendants to the parent.
// for each such descendant, also update the ancestor state to include the parent.
void CTxMemPool::UpdateTransactionsFromBlock(const std::vector<uint256> &vHashesToUpdate)
{
AssertLockHeld(cs);
// For each entry in vHashesToUpdate, store the set of in-mempool, but not
// in-vHashesToUpdate transactions, so that we don't have to recalculate
// descendants when we come across a previously seen entry.
cacheMap mapMemPoolDescendantsToUpdate;
// Use a set for lookups into vHashesToUpdate (these entries are already
// accounted for in the state of their ancestors)
std::set<uint256> setAlreadyIncluded(vHashesToUpdate.begin(), vHashesToUpdate.end());
// Iterate in reverse, so that whenever we are looking at a transaction
// we are sure that all in-mempool descendants have already been processed.
// This maximizes the benefit of the descendant cache and guarantees that
Merge #19478: Remove CTxMempool::mapLinks data structure member 296be8f58e02b39a58f017c52294aceed22c3ffd Get rid of unused functions CTxMemPool::GetMemPoolChildren, CTxMemPool::GetMemPoolParents (Jeremy Rubin) 46d955d196043cc297834baeebce31ff778dff80 Remove mapLinks in favor of entry inlined structs with iterator type erasure (Jeremy Rubin) Pull request description: Currently we have a peculiar data structure in the mempool called maplinks. Maplinks job is to track the in-pool children and parents of each transaction. This PR can be primarily understood and reviewed as a simple refactoring to remove this extra data structure, although it comes with a nice memory and performance improvement for free. Maplinks is particularly peculiar because removing it is not as simple as just moving it's inner structure to the owning CTxMempoolEntry. Because TxLinks (the class storing the setEntries for parents and children) store txiters to each entry in the mempool corresponding to the parent or child, it means that the TxLinks type is "aware" of the boost multiindex (mapTx) it's coming from, which is in turn, aware of the entry type stored in mapTx. Thus we used maplinks to store this entry associated data we in an entirely separate data structure just to avoid a circular type reference caused by storing a txiter inside a CTxMempoolEntry. It turns out, we can kill this circular reference by making use of iterator_to multiindex function and std::reference_wrapper. This allows us to get rid of the maplinks data structure and move the ownership of the parents/child sets to the entries themselves. The benefit of this good all around, for any of the reasons given below the change would be acceptable, and it doesn't make the code harder to reason about or worse in any respect (as far as I can tell, there's no tradeoff). ### Simpler ownership model No longer having to consistency check that mapLinks did have records for our CTxMempoolEntry, impossible to have a mapLinks entry outlive or incorrectly die before a CTxMempoolEntry. ### Memory Usage We get rid of a O(Transactions) sized map in the mempool, which is a long lived data structure. ### Performance If you have a CTxMemPoolEntry, you immediately know the address of it's children/parents, rather than having to do a O(log(Transactions)) lookup via maplinks (which we do very often). We do it in *so many* places that a true benchmark has to look at a full running node, but it is easy enough to show an improvement in this case. The ComplexMemPool shows a good coherence check that we see the expected result of it being 12.5% faster / 1.14x faster. ``` Before: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.40462, 0.277222, 0.285339, 0.279793 After: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.22586, 0.243831, 0.247076, 0.244596 ``` The ComplexMemPool benchmark only checks doing addUnchecked and TrimToSize for 800 transactions. While this bench does a good job of hammering the relevant types of function, it doesn't test everything. Subbing in 5000 transactions shows a that the advantage isn't completely wiped out by other asymptotic factors (this isn't the only bottleneck in growing the mempool), but it's only a bit proportionally slower (10.8%, 1.12x), which adds evidence that this will be a good change for performance minded users. ``` # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 59.1321, 11.5919, 12.235, 11.7068 # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 52.1307, 10.2641, 10.5206, 10.4306 ``` I don't think it's possible to come up with an example of where a maplinks based design would have better performance, but it's something for reviewers to consider. # Discussion ## Why maplinks in the first place? I spoke with the author of mapLinks (sdaftuar) a while back, and my recollection from our conversation was that it was implemented because he did not know how to resolve the circular dependency at the time, and there was no other reason for making it a separate map. ## Is iterator_to weird? iterator_to is expressly for this purpose, see https://www.boost.org/doc/libs/1_51_0/libs/multi_index/doc/tutorial/indices.html#iterator_to > iterator_to provides a way to retrieve an iterator to an element from a pointer to the element, thus making iterators and pointers interchangeable for the purposes of element pointing (not so for traversal) in many situations. This notwithstanding, it is not the aim of iterator_to to promote the usage of pointers as substitutes for real iterators: the latter are specifically designed for handling the elements of a container, and not only benefit from the iterator orientation of container interfaces, but are also capable of exposing many more programming bugs than raw pointers, both at compile and run time. iterator_to is thus meant to be used in scenarios where access via iterators is not suitable or desireable: > > - Interoperability with preexisting APIs based on pointers or references. > - Publication of pointer-based interfaces (for instance, when designing a C-compatible library). > - The exposure of pointers in place of iterators can act as a type erasure barrier effectively decoupling the user of the code from the implementation detail of which particular container is being used. Similar techniques, like the famous Pimpl idiom, are used in large projects to reduce dependencies and build times. > - Self-referencing contexts where an element acts upon its owner container and no iterator to itself is available. In other words, iterator_to is the perfect tool for the job by the last reason given. Under the hood it should just be a simple pointer cast and have no major runtime overhead (depending on if the function call is inlined). Edit by laanwj: removed at sign from the description ACKs for top commit: jonatack: re-ACK 296be8f per `git range-diff ab338a19 3ba1665 296be8f`, sanity check gcc 10.2 debug build is clean. hebasto: re-ACK 296be8f58e02b39a58f017c52294aceed22c3ffd, only rebased since my [previous](https://github.com/bitcoin/bitcoin/pull/19478#pullrequestreview-482400727) review (verified with `git range-diff`). Tree-SHA512: f5c30a4936fcde6ae32a02823c303b3568a747c2681d11f87df88a149f984a6d3b4c81f391859afbeb68864ef7f6a3d8779f74a58e3de701b3d51f78e498682e
2020-09-07 12:06:38 +02:00
// CTxMemPool::m_children will be updated, an assumption made in
// UpdateForDescendants.
for (const uint256 &hash : reverse_iterate(vHashesToUpdate)) {
// calculate children from mapNextTx
txiter it = mapTx.find(hash);
if (it == mapTx.end()) {
continue;
estimatefee / estimatepriority RPC methods New RPC methods: return an estimate of the fee (or priority) a transaction needs to be likely to confirm in a given number of blocks. Mike Hearn created the first version of this method for estimating fees. It works as follows: For transactions that took 1 to N (I picked N=25) blocks to confirm, keep N buckets with at most 100 entries in each recording the fees-per-kilobyte paid by those transactions. (separate buckets are kept for transactions that confirmed because they are high-priority) The buckets are filled as blocks are found, and are saved/restored in a new fee_estiamtes.dat file in the data directory. A few variations on Mike's initial scheme: To estimate the fee needed for a transaction to confirm in X buckets, all of the samples in all of the buckets are used and a median of all of the data is used to make the estimate. For example, imagine 25 buckets each containing the full 100 entries. Those 2,500 samples are sorted, and the estimate of the fee needed to confirm in the very next block is the 50'th-highest-fee-entry in that sorted list; the estimate of the fee needed to confirm in the next two blocks is the 150'th-highest-fee-entry, etc. That algorithm has the nice property that estimates of how much fee you need to pay to get confirmed in block N will always be greater than or equal to the estimate for block N+1. It would clearly be wrong to say "pay 11 uBTC and you'll get confirmed in 3 blocks, but pay 12 uBTC and it will take LONGER". A single block will not contribute more than 10 entries to any one bucket, so a single miner and a large block cannot overwhelm the estimates.
2014-03-17 13:19:54 +01:00
}
auto iter = mapNextTx.lower_bound(COutPoint(hash, 0));
Merge #19478: Remove CTxMempool::mapLinks data structure member 296be8f58e02b39a58f017c52294aceed22c3ffd Get rid of unused functions CTxMemPool::GetMemPoolChildren, CTxMemPool::GetMemPoolParents (Jeremy Rubin) 46d955d196043cc297834baeebce31ff778dff80 Remove mapLinks in favor of entry inlined structs with iterator type erasure (Jeremy Rubin) Pull request description: Currently we have a peculiar data structure in the mempool called maplinks. Maplinks job is to track the in-pool children and parents of each transaction. This PR can be primarily understood and reviewed as a simple refactoring to remove this extra data structure, although it comes with a nice memory and performance improvement for free. Maplinks is particularly peculiar because removing it is not as simple as just moving it's inner structure to the owning CTxMempoolEntry. Because TxLinks (the class storing the setEntries for parents and children) store txiters to each entry in the mempool corresponding to the parent or child, it means that the TxLinks type is "aware" of the boost multiindex (mapTx) it's coming from, which is in turn, aware of the entry type stored in mapTx. Thus we used maplinks to store this entry associated data we in an entirely separate data structure just to avoid a circular type reference caused by storing a txiter inside a CTxMempoolEntry. It turns out, we can kill this circular reference by making use of iterator_to multiindex function and std::reference_wrapper. This allows us to get rid of the maplinks data structure and move the ownership of the parents/child sets to the entries themselves. The benefit of this good all around, for any of the reasons given below the change would be acceptable, and it doesn't make the code harder to reason about or worse in any respect (as far as I can tell, there's no tradeoff). ### Simpler ownership model No longer having to consistency check that mapLinks did have records for our CTxMempoolEntry, impossible to have a mapLinks entry outlive or incorrectly die before a CTxMempoolEntry. ### Memory Usage We get rid of a O(Transactions) sized map in the mempool, which is a long lived data structure. ### Performance If you have a CTxMemPoolEntry, you immediately know the address of it's children/parents, rather than having to do a O(log(Transactions)) lookup via maplinks (which we do very often). We do it in *so many* places that a true benchmark has to look at a full running node, but it is easy enough to show an improvement in this case. The ComplexMemPool shows a good coherence check that we see the expected result of it being 12.5% faster / 1.14x faster. ``` Before: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.40462, 0.277222, 0.285339, 0.279793 After: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.22586, 0.243831, 0.247076, 0.244596 ``` The ComplexMemPool benchmark only checks doing addUnchecked and TrimToSize for 800 transactions. While this bench does a good job of hammering the relevant types of function, it doesn't test everything. Subbing in 5000 transactions shows a that the advantage isn't completely wiped out by other asymptotic factors (this isn't the only bottleneck in growing the mempool), but it's only a bit proportionally slower (10.8%, 1.12x), which adds evidence that this will be a good change for performance minded users. ``` # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 59.1321, 11.5919, 12.235, 11.7068 # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 52.1307, 10.2641, 10.5206, 10.4306 ``` I don't think it's possible to come up with an example of where a maplinks based design would have better performance, but it's something for reviewers to consider. # Discussion ## Why maplinks in the first place? I spoke with the author of mapLinks (sdaftuar) a while back, and my recollection from our conversation was that it was implemented because he did not know how to resolve the circular dependency at the time, and there was no other reason for making it a separate map. ## Is iterator_to weird? iterator_to is expressly for this purpose, see https://www.boost.org/doc/libs/1_51_0/libs/multi_index/doc/tutorial/indices.html#iterator_to > iterator_to provides a way to retrieve an iterator to an element from a pointer to the element, thus making iterators and pointers interchangeable for the purposes of element pointing (not so for traversal) in many situations. This notwithstanding, it is not the aim of iterator_to to promote the usage of pointers as substitutes for real iterators: the latter are specifically designed for handling the elements of a container, and not only benefit from the iterator orientation of container interfaces, but are also capable of exposing many more programming bugs than raw pointers, both at compile and run time. iterator_to is thus meant to be used in scenarios where access via iterators is not suitable or desireable: > > - Interoperability with preexisting APIs based on pointers or references. > - Publication of pointer-based interfaces (for instance, when designing a C-compatible library). > - The exposure of pointers in place of iterators can act as a type erasure barrier effectively decoupling the user of the code from the implementation detail of which particular container is being used. Similar techniques, like the famous Pimpl idiom, are used in large projects to reduce dependencies and build times. > - Self-referencing contexts where an element acts upon its owner container and no iterator to itself is available. In other words, iterator_to is the perfect tool for the job by the last reason given. Under the hood it should just be a simple pointer cast and have no major runtime overhead (depending on if the function call is inlined). Edit by laanwj: removed at sign from the description ACKs for top commit: jonatack: re-ACK 296be8f per `git range-diff ab338a19 3ba1665 296be8f`, sanity check gcc 10.2 debug build is clean. hebasto: re-ACK 296be8f58e02b39a58f017c52294aceed22c3ffd, only rebased since my [previous](https://github.com/bitcoin/bitcoin/pull/19478#pullrequestreview-482400727) review (verified with `git range-diff`). Tree-SHA512: f5c30a4936fcde6ae32a02823c303b3568a747c2681d11f87df88a149f984a6d3b4c81f391859afbeb68864ef7f6a3d8779f74a58e3de701b3d51f78e498682e
2020-09-07 12:06:38 +02:00
// First calculate the children, and update CTxMemPool::m_children to
// include them, and update their CTxMemPoolEntry::m_parents to include this tx.
// we cache the in-mempool children to avoid duplicate updates
{
WITH_FRESH_EPOCH(m_epoch);
for (; iter != mapNextTx.end() && iter->first->hash == hash; ++iter) {
const uint256 &childHash = iter->second->GetHash();
txiter childIter = mapTx.find(childHash);
assert(childIter != mapTx.end());
// We can skip updating entries we've encountered before or that
// are in the block (which are already accounted for).
if (!visited(childIter) && !setAlreadyIncluded.count(childHash)) {
UpdateChild(it, childIter, true);
UpdateParent(childIter, it, true);
}
estimatefee / estimatepriority RPC methods New RPC methods: return an estimate of the fee (or priority) a transaction needs to be likely to confirm in a given number of blocks. Mike Hearn created the first version of this method for estimating fees. It works as follows: For transactions that took 1 to N (I picked N=25) blocks to confirm, keep N buckets with at most 100 entries in each recording the fees-per-kilobyte paid by those transactions. (separate buckets are kept for transactions that confirmed because they are high-priority) The buckets are filled as blocks are found, and are saved/restored in a new fee_estiamtes.dat file in the data directory. A few variations on Mike's initial scheme: To estimate the fee needed for a transaction to confirm in X buckets, all of the samples in all of the buckets are used and a median of all of the data is used to make the estimate. For example, imagine 25 buckets each containing the full 100 entries. Those 2,500 samples are sorted, and the estimate of the fee needed to confirm in the very next block is the 50'th-highest-fee-entry in that sorted list; the estimate of the fee needed to confirm in the next two blocks is the 150'th-highest-fee-entry, etc. That algorithm has the nice property that estimates of how much fee you need to pay to get confirmed in block N will always be greater than or equal to the estimate for block N+1. It would clearly be wrong to say "pay 11 uBTC and you'll get confirmed in 3 blocks, but pay 12 uBTC and it will take LONGER". A single block will not contribute more than 10 entries to any one bucket, so a single miner and a large block cannot overwhelm the estimates.
2014-03-17 13:19:54 +01:00
}
} // release epoch guard for UpdateForDescendants
UpdateForDescendants(it, mapMemPoolDescendantsToUpdate, setAlreadyIncluded);
}
}
bool CTxMemPool::CalculateMemPoolAncestors(const CTxMemPoolEntry &entry, setEntries &setAncestors, uint64_t limitAncestorCount, uint64_t limitAncestorSize, uint64_t limitDescendantCount, uint64_t limitDescendantSize, std::string &errString, bool fSearchForParents /* = true */) const
{
Merge #19478: Remove CTxMempool::mapLinks data structure member 296be8f58e02b39a58f017c52294aceed22c3ffd Get rid of unused functions CTxMemPool::GetMemPoolChildren, CTxMemPool::GetMemPoolParents (Jeremy Rubin) 46d955d196043cc297834baeebce31ff778dff80 Remove mapLinks in favor of entry inlined structs with iterator type erasure (Jeremy Rubin) Pull request description: Currently we have a peculiar data structure in the mempool called maplinks. Maplinks job is to track the in-pool children and parents of each transaction. This PR can be primarily understood and reviewed as a simple refactoring to remove this extra data structure, although it comes with a nice memory and performance improvement for free. Maplinks is particularly peculiar because removing it is not as simple as just moving it's inner structure to the owning CTxMempoolEntry. Because TxLinks (the class storing the setEntries for parents and children) store txiters to each entry in the mempool corresponding to the parent or child, it means that the TxLinks type is "aware" of the boost multiindex (mapTx) it's coming from, which is in turn, aware of the entry type stored in mapTx. Thus we used maplinks to store this entry associated data we in an entirely separate data structure just to avoid a circular type reference caused by storing a txiter inside a CTxMempoolEntry. It turns out, we can kill this circular reference by making use of iterator_to multiindex function and std::reference_wrapper. This allows us to get rid of the maplinks data structure and move the ownership of the parents/child sets to the entries themselves. The benefit of this good all around, for any of the reasons given below the change would be acceptable, and it doesn't make the code harder to reason about or worse in any respect (as far as I can tell, there's no tradeoff). ### Simpler ownership model No longer having to consistency check that mapLinks did have records for our CTxMempoolEntry, impossible to have a mapLinks entry outlive or incorrectly die before a CTxMempoolEntry. ### Memory Usage We get rid of a O(Transactions) sized map in the mempool, which is a long lived data structure. ### Performance If you have a CTxMemPoolEntry, you immediately know the address of it's children/parents, rather than having to do a O(log(Transactions)) lookup via maplinks (which we do very often). We do it in *so many* places that a true benchmark has to look at a full running node, but it is easy enough to show an improvement in this case. The ComplexMemPool shows a good coherence check that we see the expected result of it being 12.5% faster / 1.14x faster. ``` Before: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.40462, 0.277222, 0.285339, 0.279793 After: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.22586, 0.243831, 0.247076, 0.244596 ``` The ComplexMemPool benchmark only checks doing addUnchecked and TrimToSize for 800 transactions. While this bench does a good job of hammering the relevant types of function, it doesn't test everything. Subbing in 5000 transactions shows a that the advantage isn't completely wiped out by other asymptotic factors (this isn't the only bottleneck in growing the mempool), but it's only a bit proportionally slower (10.8%, 1.12x), which adds evidence that this will be a good change for performance minded users. ``` # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 59.1321, 11.5919, 12.235, 11.7068 # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 52.1307, 10.2641, 10.5206, 10.4306 ``` I don't think it's possible to come up with an example of where a maplinks based design would have better performance, but it's something for reviewers to consider. # Discussion ## Why maplinks in the first place? I spoke with the author of mapLinks (sdaftuar) a while back, and my recollection from our conversation was that it was implemented because he did not know how to resolve the circular dependency at the time, and there was no other reason for making it a separate map. ## Is iterator_to weird? iterator_to is expressly for this purpose, see https://www.boost.org/doc/libs/1_51_0/libs/multi_index/doc/tutorial/indices.html#iterator_to > iterator_to provides a way to retrieve an iterator to an element from a pointer to the element, thus making iterators and pointers interchangeable for the purposes of element pointing (not so for traversal) in many situations. This notwithstanding, it is not the aim of iterator_to to promote the usage of pointers as substitutes for real iterators: the latter are specifically designed for handling the elements of a container, and not only benefit from the iterator orientation of container interfaces, but are also capable of exposing many more programming bugs than raw pointers, both at compile and run time. iterator_to is thus meant to be used in scenarios where access via iterators is not suitable or desireable: > > - Interoperability with preexisting APIs based on pointers or references. > - Publication of pointer-based interfaces (for instance, when designing a C-compatible library). > - The exposure of pointers in place of iterators can act as a type erasure barrier effectively decoupling the user of the code from the implementation detail of which particular container is being used. Similar techniques, like the famous Pimpl idiom, are used in large projects to reduce dependencies and build times. > - Self-referencing contexts where an element acts upon its owner container and no iterator to itself is available. In other words, iterator_to is the perfect tool for the job by the last reason given. Under the hood it should just be a simple pointer cast and have no major runtime overhead (depending on if the function call is inlined). Edit by laanwj: removed at sign from the description ACKs for top commit: jonatack: re-ACK 296be8f per `git range-diff ab338a19 3ba1665 296be8f`, sanity check gcc 10.2 debug build is clean. hebasto: re-ACK 296be8f58e02b39a58f017c52294aceed22c3ffd, only rebased since my [previous](https://github.com/bitcoin/bitcoin/pull/19478#pullrequestreview-482400727) review (verified with `git range-diff`). Tree-SHA512: f5c30a4936fcde6ae32a02823c303b3568a747c2681d11f87df88a149f984a6d3b4c81f391859afbeb68864ef7f6a3d8779f74a58e3de701b3d51f78e498682e
2020-09-07 12:06:38 +02:00
CTxMemPoolEntry::Parents staged_ancestors;
const CTransaction &tx = entry.GetTx();
if (fSearchForParents) {
// Get parents of this transaction that are in the mempool
// GetMemPoolParents() is only valid for entries in the mempool, so we
// iterate mapTx to find parents.
for (unsigned int i = 0; i < tx.vin.size(); i++) {
std::optional<txiter> piter = GetIter(tx.vin[i].prevout.hash);
if (piter) {
Merge #19478: Remove CTxMempool::mapLinks data structure member 296be8f58e02b39a58f017c52294aceed22c3ffd Get rid of unused functions CTxMemPool::GetMemPoolChildren, CTxMemPool::GetMemPoolParents (Jeremy Rubin) 46d955d196043cc297834baeebce31ff778dff80 Remove mapLinks in favor of entry inlined structs with iterator type erasure (Jeremy Rubin) Pull request description: Currently we have a peculiar data structure in the mempool called maplinks. Maplinks job is to track the in-pool children and parents of each transaction. This PR can be primarily understood and reviewed as a simple refactoring to remove this extra data structure, although it comes with a nice memory and performance improvement for free. Maplinks is particularly peculiar because removing it is not as simple as just moving it's inner structure to the owning CTxMempoolEntry. Because TxLinks (the class storing the setEntries for parents and children) store txiters to each entry in the mempool corresponding to the parent or child, it means that the TxLinks type is "aware" of the boost multiindex (mapTx) it's coming from, which is in turn, aware of the entry type stored in mapTx. Thus we used maplinks to store this entry associated data we in an entirely separate data structure just to avoid a circular type reference caused by storing a txiter inside a CTxMempoolEntry. It turns out, we can kill this circular reference by making use of iterator_to multiindex function and std::reference_wrapper. This allows us to get rid of the maplinks data structure and move the ownership of the parents/child sets to the entries themselves. The benefit of this good all around, for any of the reasons given below the change would be acceptable, and it doesn't make the code harder to reason about or worse in any respect (as far as I can tell, there's no tradeoff). ### Simpler ownership model No longer having to consistency check that mapLinks did have records for our CTxMempoolEntry, impossible to have a mapLinks entry outlive or incorrectly die before a CTxMempoolEntry. ### Memory Usage We get rid of a O(Transactions) sized map in the mempool, which is a long lived data structure. ### Performance If you have a CTxMemPoolEntry, you immediately know the address of it's children/parents, rather than having to do a O(log(Transactions)) lookup via maplinks (which we do very often). We do it in *so many* places that a true benchmark has to look at a full running node, but it is easy enough to show an improvement in this case. The ComplexMemPool shows a good coherence check that we see the expected result of it being 12.5% faster / 1.14x faster. ``` Before: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.40462, 0.277222, 0.285339, 0.279793 After: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.22586, 0.243831, 0.247076, 0.244596 ``` The ComplexMemPool benchmark only checks doing addUnchecked and TrimToSize for 800 transactions. While this bench does a good job of hammering the relevant types of function, it doesn't test everything. Subbing in 5000 transactions shows a that the advantage isn't completely wiped out by other asymptotic factors (this isn't the only bottleneck in growing the mempool), but it's only a bit proportionally slower (10.8%, 1.12x), which adds evidence that this will be a good change for performance minded users. ``` # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 59.1321, 11.5919, 12.235, 11.7068 # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 52.1307, 10.2641, 10.5206, 10.4306 ``` I don't think it's possible to come up with an example of where a maplinks based design would have better performance, but it's something for reviewers to consider. # Discussion ## Why maplinks in the first place? I spoke with the author of mapLinks (sdaftuar) a while back, and my recollection from our conversation was that it was implemented because he did not know how to resolve the circular dependency at the time, and there was no other reason for making it a separate map. ## Is iterator_to weird? iterator_to is expressly for this purpose, see https://www.boost.org/doc/libs/1_51_0/libs/multi_index/doc/tutorial/indices.html#iterator_to > iterator_to provides a way to retrieve an iterator to an element from a pointer to the element, thus making iterators and pointers interchangeable for the purposes of element pointing (not so for traversal) in many situations. This notwithstanding, it is not the aim of iterator_to to promote the usage of pointers as substitutes for real iterators: the latter are specifically designed for handling the elements of a container, and not only benefit from the iterator orientation of container interfaces, but are also capable of exposing many more programming bugs than raw pointers, both at compile and run time. iterator_to is thus meant to be used in scenarios where access via iterators is not suitable or desireable: > > - Interoperability with preexisting APIs based on pointers or references. > - Publication of pointer-based interfaces (for instance, when designing a C-compatible library). > - The exposure of pointers in place of iterators can act as a type erasure barrier effectively decoupling the user of the code from the implementation detail of which particular container is being used. Similar techniques, like the famous Pimpl idiom, are used in large projects to reduce dependencies and build times. > - Self-referencing contexts where an element acts upon its owner container and no iterator to itself is available. In other words, iterator_to is the perfect tool for the job by the last reason given. Under the hood it should just be a simple pointer cast and have no major runtime overhead (depending on if the function call is inlined). Edit by laanwj: removed at sign from the description ACKs for top commit: jonatack: re-ACK 296be8f per `git range-diff ab338a19 3ba1665 296be8f`, sanity check gcc 10.2 debug build is clean. hebasto: re-ACK 296be8f58e02b39a58f017c52294aceed22c3ffd, only rebased since my [previous](https://github.com/bitcoin/bitcoin/pull/19478#pullrequestreview-482400727) review (verified with `git range-diff`). Tree-SHA512: f5c30a4936fcde6ae32a02823c303b3568a747c2681d11f87df88a149f984a6d3b4c81f391859afbeb68864ef7f6a3d8779f74a58e3de701b3d51f78e498682e
2020-09-07 12:06:38 +02:00
staged_ancestors.insert(**piter);
if (staged_ancestors.size() + 1 > limitAncestorCount) {
errString = strprintf("too many unconfirmed parents [limit: %u]", limitAncestorCount);
return false;
}
}
estimatefee / estimatepriority RPC methods New RPC methods: return an estimate of the fee (or priority) a transaction needs to be likely to confirm in a given number of blocks. Mike Hearn created the first version of this method for estimating fees. It works as follows: For transactions that took 1 to N (I picked N=25) blocks to confirm, keep N buckets with at most 100 entries in each recording the fees-per-kilobyte paid by those transactions. (separate buckets are kept for transactions that confirmed because they are high-priority) The buckets are filled as blocks are found, and are saved/restored in a new fee_estiamtes.dat file in the data directory. A few variations on Mike's initial scheme: To estimate the fee needed for a transaction to confirm in X buckets, all of the samples in all of the buckets are used and a median of all of the data is used to make the estimate. For example, imagine 25 buckets each containing the full 100 entries. Those 2,500 samples are sorted, and the estimate of the fee needed to confirm in the very next block is the 50'th-highest-fee-entry in that sorted list; the estimate of the fee needed to confirm in the next two blocks is the 150'th-highest-fee-entry, etc. That algorithm has the nice property that estimates of how much fee you need to pay to get confirmed in block N will always be greater than or equal to the estimate for block N+1. It would clearly be wrong to say "pay 11 uBTC and you'll get confirmed in 3 blocks, but pay 12 uBTC and it will take LONGER". A single block will not contribute more than 10 entries to any one bucket, so a single miner and a large block cannot overwhelm the estimates.
2014-03-17 13:19:54 +01:00
}
} else {
// If we're not searching for parents, we require this to be an
// entry in the mempool already.
txiter it = mapTx.iterator_to(entry);
Merge #19478: Remove CTxMempool::mapLinks data structure member 296be8f58e02b39a58f017c52294aceed22c3ffd Get rid of unused functions CTxMemPool::GetMemPoolChildren, CTxMemPool::GetMemPoolParents (Jeremy Rubin) 46d955d196043cc297834baeebce31ff778dff80 Remove mapLinks in favor of entry inlined structs with iterator type erasure (Jeremy Rubin) Pull request description: Currently we have a peculiar data structure in the mempool called maplinks. Maplinks job is to track the in-pool children and parents of each transaction. This PR can be primarily understood and reviewed as a simple refactoring to remove this extra data structure, although it comes with a nice memory and performance improvement for free. Maplinks is particularly peculiar because removing it is not as simple as just moving it's inner structure to the owning CTxMempoolEntry. Because TxLinks (the class storing the setEntries for parents and children) store txiters to each entry in the mempool corresponding to the parent or child, it means that the TxLinks type is "aware" of the boost multiindex (mapTx) it's coming from, which is in turn, aware of the entry type stored in mapTx. Thus we used maplinks to store this entry associated data we in an entirely separate data structure just to avoid a circular type reference caused by storing a txiter inside a CTxMempoolEntry. It turns out, we can kill this circular reference by making use of iterator_to multiindex function and std::reference_wrapper. This allows us to get rid of the maplinks data structure and move the ownership of the parents/child sets to the entries themselves. The benefit of this good all around, for any of the reasons given below the change would be acceptable, and it doesn't make the code harder to reason about or worse in any respect (as far as I can tell, there's no tradeoff). ### Simpler ownership model No longer having to consistency check that mapLinks did have records for our CTxMempoolEntry, impossible to have a mapLinks entry outlive or incorrectly die before a CTxMempoolEntry. ### Memory Usage We get rid of a O(Transactions) sized map in the mempool, which is a long lived data structure. ### Performance If you have a CTxMemPoolEntry, you immediately know the address of it's children/parents, rather than having to do a O(log(Transactions)) lookup via maplinks (which we do very often). We do it in *so many* places that a true benchmark has to look at a full running node, but it is easy enough to show an improvement in this case. The ComplexMemPool shows a good coherence check that we see the expected result of it being 12.5% faster / 1.14x faster. ``` Before: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.40462, 0.277222, 0.285339, 0.279793 After: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.22586, 0.243831, 0.247076, 0.244596 ``` The ComplexMemPool benchmark only checks doing addUnchecked and TrimToSize for 800 transactions. While this bench does a good job of hammering the relevant types of function, it doesn't test everything. Subbing in 5000 transactions shows a that the advantage isn't completely wiped out by other asymptotic factors (this isn't the only bottleneck in growing the mempool), but it's only a bit proportionally slower (10.8%, 1.12x), which adds evidence that this will be a good change for performance minded users. ``` # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 59.1321, 11.5919, 12.235, 11.7068 # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 52.1307, 10.2641, 10.5206, 10.4306 ``` I don't think it's possible to come up with an example of where a maplinks based design would have better performance, but it's something for reviewers to consider. # Discussion ## Why maplinks in the first place? I spoke with the author of mapLinks (sdaftuar) a while back, and my recollection from our conversation was that it was implemented because he did not know how to resolve the circular dependency at the time, and there was no other reason for making it a separate map. ## Is iterator_to weird? iterator_to is expressly for this purpose, see https://www.boost.org/doc/libs/1_51_0/libs/multi_index/doc/tutorial/indices.html#iterator_to > iterator_to provides a way to retrieve an iterator to an element from a pointer to the element, thus making iterators and pointers interchangeable for the purposes of element pointing (not so for traversal) in many situations. This notwithstanding, it is not the aim of iterator_to to promote the usage of pointers as substitutes for real iterators: the latter are specifically designed for handling the elements of a container, and not only benefit from the iterator orientation of container interfaces, but are also capable of exposing many more programming bugs than raw pointers, both at compile and run time. iterator_to is thus meant to be used in scenarios where access via iterators is not suitable or desireable: > > - Interoperability with preexisting APIs based on pointers or references. > - Publication of pointer-based interfaces (for instance, when designing a C-compatible library). > - The exposure of pointers in place of iterators can act as a type erasure barrier effectively decoupling the user of the code from the implementation detail of which particular container is being used. Similar techniques, like the famous Pimpl idiom, are used in large projects to reduce dependencies and build times. > - Self-referencing contexts where an element acts upon its owner container and no iterator to itself is available. In other words, iterator_to is the perfect tool for the job by the last reason given. Under the hood it should just be a simple pointer cast and have no major runtime overhead (depending on if the function call is inlined). Edit by laanwj: removed at sign from the description ACKs for top commit: jonatack: re-ACK 296be8f per `git range-diff ab338a19 3ba1665 296be8f`, sanity check gcc 10.2 debug build is clean. hebasto: re-ACK 296be8f58e02b39a58f017c52294aceed22c3ffd, only rebased since my [previous](https://github.com/bitcoin/bitcoin/pull/19478#pullrequestreview-482400727) review (verified with `git range-diff`). Tree-SHA512: f5c30a4936fcde6ae32a02823c303b3568a747c2681d11f87df88a149f984a6d3b4c81f391859afbeb68864ef7f6a3d8779f74a58e3de701b3d51f78e498682e
2020-09-07 12:06:38 +02:00
staged_ancestors = it->GetMemPoolParentsConst();
estimatefee / estimatepriority RPC methods New RPC methods: return an estimate of the fee (or priority) a transaction needs to be likely to confirm in a given number of blocks. Mike Hearn created the first version of this method for estimating fees. It works as follows: For transactions that took 1 to N (I picked N=25) blocks to confirm, keep N buckets with at most 100 entries in each recording the fees-per-kilobyte paid by those transactions. (separate buckets are kept for transactions that confirmed because they are high-priority) The buckets are filled as blocks are found, and are saved/restored in a new fee_estiamtes.dat file in the data directory. A few variations on Mike's initial scheme: To estimate the fee needed for a transaction to confirm in X buckets, all of the samples in all of the buckets are used and a median of all of the data is used to make the estimate. For example, imagine 25 buckets each containing the full 100 entries. Those 2,500 samples are sorted, and the estimate of the fee needed to confirm in the very next block is the 50'th-highest-fee-entry in that sorted list; the estimate of the fee needed to confirm in the next two blocks is the 150'th-highest-fee-entry, etc. That algorithm has the nice property that estimates of how much fee you need to pay to get confirmed in block N will always be greater than or equal to the estimate for block N+1. It would clearly be wrong to say "pay 11 uBTC and you'll get confirmed in 3 blocks, but pay 12 uBTC and it will take LONGER". A single block will not contribute more than 10 entries to any one bucket, so a single miner and a large block cannot overwhelm the estimates.
2014-03-17 13:19:54 +01:00
}
size_t totalSizeWithAncestors = entry.GetTxSize();
estimatefee / estimatepriority RPC methods New RPC methods: return an estimate of the fee (or priority) a transaction needs to be likely to confirm in a given number of blocks. Mike Hearn created the first version of this method for estimating fees. It works as follows: For transactions that took 1 to N (I picked N=25) blocks to confirm, keep N buckets with at most 100 entries in each recording the fees-per-kilobyte paid by those transactions. (separate buckets are kept for transactions that confirmed because they are high-priority) The buckets are filled as blocks are found, and are saved/restored in a new fee_estiamtes.dat file in the data directory. A few variations on Mike's initial scheme: To estimate the fee needed for a transaction to confirm in X buckets, all of the samples in all of the buckets are used and a median of all of the data is used to make the estimate. For example, imagine 25 buckets each containing the full 100 entries. Those 2,500 samples are sorted, and the estimate of the fee needed to confirm in the very next block is the 50'th-highest-fee-entry in that sorted list; the estimate of the fee needed to confirm in the next two blocks is the 150'th-highest-fee-entry, etc. That algorithm has the nice property that estimates of how much fee you need to pay to get confirmed in block N will always be greater than or equal to the estimate for block N+1. It would clearly be wrong to say "pay 11 uBTC and you'll get confirmed in 3 blocks, but pay 12 uBTC and it will take LONGER". A single block will not contribute more than 10 entries to any one bucket, so a single miner and a large block cannot overwhelm the estimates.
2014-03-17 13:19:54 +01:00
Merge #19478: Remove CTxMempool::mapLinks data structure member 296be8f58e02b39a58f017c52294aceed22c3ffd Get rid of unused functions CTxMemPool::GetMemPoolChildren, CTxMemPool::GetMemPoolParents (Jeremy Rubin) 46d955d196043cc297834baeebce31ff778dff80 Remove mapLinks in favor of entry inlined structs with iterator type erasure (Jeremy Rubin) Pull request description: Currently we have a peculiar data structure in the mempool called maplinks. Maplinks job is to track the in-pool children and parents of each transaction. This PR can be primarily understood and reviewed as a simple refactoring to remove this extra data structure, although it comes with a nice memory and performance improvement for free. Maplinks is particularly peculiar because removing it is not as simple as just moving it's inner structure to the owning CTxMempoolEntry. Because TxLinks (the class storing the setEntries for parents and children) store txiters to each entry in the mempool corresponding to the parent or child, it means that the TxLinks type is "aware" of the boost multiindex (mapTx) it's coming from, which is in turn, aware of the entry type stored in mapTx. Thus we used maplinks to store this entry associated data we in an entirely separate data structure just to avoid a circular type reference caused by storing a txiter inside a CTxMempoolEntry. It turns out, we can kill this circular reference by making use of iterator_to multiindex function and std::reference_wrapper. This allows us to get rid of the maplinks data structure and move the ownership of the parents/child sets to the entries themselves. The benefit of this good all around, for any of the reasons given below the change would be acceptable, and it doesn't make the code harder to reason about or worse in any respect (as far as I can tell, there's no tradeoff). ### Simpler ownership model No longer having to consistency check that mapLinks did have records for our CTxMempoolEntry, impossible to have a mapLinks entry outlive or incorrectly die before a CTxMempoolEntry. ### Memory Usage We get rid of a O(Transactions) sized map in the mempool, which is a long lived data structure. ### Performance If you have a CTxMemPoolEntry, you immediately know the address of it's children/parents, rather than having to do a O(log(Transactions)) lookup via maplinks (which we do very often). We do it in *so many* places that a true benchmark has to look at a full running node, but it is easy enough to show an improvement in this case. The ComplexMemPool shows a good coherence check that we see the expected result of it being 12.5% faster / 1.14x faster. ``` Before: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.40462, 0.277222, 0.285339, 0.279793 After: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.22586, 0.243831, 0.247076, 0.244596 ``` The ComplexMemPool benchmark only checks doing addUnchecked and TrimToSize for 800 transactions. While this bench does a good job of hammering the relevant types of function, it doesn't test everything. Subbing in 5000 transactions shows a that the advantage isn't completely wiped out by other asymptotic factors (this isn't the only bottleneck in growing the mempool), but it's only a bit proportionally slower (10.8%, 1.12x), which adds evidence that this will be a good change for performance minded users. ``` # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 59.1321, 11.5919, 12.235, 11.7068 # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 52.1307, 10.2641, 10.5206, 10.4306 ``` I don't think it's possible to come up with an example of where a maplinks based design would have better performance, but it's something for reviewers to consider. # Discussion ## Why maplinks in the first place? I spoke with the author of mapLinks (sdaftuar) a while back, and my recollection from our conversation was that it was implemented because he did not know how to resolve the circular dependency at the time, and there was no other reason for making it a separate map. ## Is iterator_to weird? iterator_to is expressly for this purpose, see https://www.boost.org/doc/libs/1_51_0/libs/multi_index/doc/tutorial/indices.html#iterator_to > iterator_to provides a way to retrieve an iterator to an element from a pointer to the element, thus making iterators and pointers interchangeable for the purposes of element pointing (not so for traversal) in many situations. This notwithstanding, it is not the aim of iterator_to to promote the usage of pointers as substitutes for real iterators: the latter are specifically designed for handling the elements of a container, and not only benefit from the iterator orientation of container interfaces, but are also capable of exposing many more programming bugs than raw pointers, both at compile and run time. iterator_to is thus meant to be used in scenarios where access via iterators is not suitable or desireable: > > - Interoperability with preexisting APIs based on pointers or references. > - Publication of pointer-based interfaces (for instance, when designing a C-compatible library). > - The exposure of pointers in place of iterators can act as a type erasure barrier effectively decoupling the user of the code from the implementation detail of which particular container is being used. Similar techniques, like the famous Pimpl idiom, are used in large projects to reduce dependencies and build times. > - Self-referencing contexts where an element acts upon its owner container and no iterator to itself is available. In other words, iterator_to is the perfect tool for the job by the last reason given. Under the hood it should just be a simple pointer cast and have no major runtime overhead (depending on if the function call is inlined). Edit by laanwj: removed at sign from the description ACKs for top commit: jonatack: re-ACK 296be8f per `git range-diff ab338a19 3ba1665 296be8f`, sanity check gcc 10.2 debug build is clean. hebasto: re-ACK 296be8f58e02b39a58f017c52294aceed22c3ffd, only rebased since my [previous](https://github.com/bitcoin/bitcoin/pull/19478#pullrequestreview-482400727) review (verified with `git range-diff`). Tree-SHA512: f5c30a4936fcde6ae32a02823c303b3568a747c2681d11f87df88a149f984a6d3b4c81f391859afbeb68864ef7f6a3d8779f74a58e3de701b3d51f78e498682e
2020-09-07 12:06:38 +02:00
while (!staged_ancestors.empty()) {
const CTxMemPoolEntry& stage = staged_ancestors.begin()->get();
txiter stageit = mapTx.iterator_to(stage);
estimatefee / estimatepriority RPC methods New RPC methods: return an estimate of the fee (or priority) a transaction needs to be likely to confirm in a given number of blocks. Mike Hearn created the first version of this method for estimating fees. It works as follows: For transactions that took 1 to N (I picked N=25) blocks to confirm, keep N buckets with at most 100 entries in each recording the fees-per-kilobyte paid by those transactions. (separate buckets are kept for transactions that confirmed because they are high-priority) The buckets are filled as blocks are found, and are saved/restored in a new fee_estiamtes.dat file in the data directory. A few variations on Mike's initial scheme: To estimate the fee needed for a transaction to confirm in X buckets, all of the samples in all of the buckets are used and a median of all of the data is used to make the estimate. For example, imagine 25 buckets each containing the full 100 entries. Those 2,500 samples are sorted, and the estimate of the fee needed to confirm in the very next block is the 50'th-highest-fee-entry in that sorted list; the estimate of the fee needed to confirm in the next two blocks is the 150'th-highest-fee-entry, etc. That algorithm has the nice property that estimates of how much fee you need to pay to get confirmed in block N will always be greater than or equal to the estimate for block N+1. It would clearly be wrong to say "pay 11 uBTC and you'll get confirmed in 3 blocks, but pay 12 uBTC and it will take LONGER". A single block will not contribute more than 10 entries to any one bucket, so a single miner and a large block cannot overwhelm the estimates.
2014-03-17 13:19:54 +01:00
setAncestors.insert(stageit);
Merge #19478: Remove CTxMempool::mapLinks data structure member 296be8f58e02b39a58f017c52294aceed22c3ffd Get rid of unused functions CTxMemPool::GetMemPoolChildren, CTxMemPool::GetMemPoolParents (Jeremy Rubin) 46d955d196043cc297834baeebce31ff778dff80 Remove mapLinks in favor of entry inlined structs with iterator type erasure (Jeremy Rubin) Pull request description: Currently we have a peculiar data structure in the mempool called maplinks. Maplinks job is to track the in-pool children and parents of each transaction. This PR can be primarily understood and reviewed as a simple refactoring to remove this extra data structure, although it comes with a nice memory and performance improvement for free. Maplinks is particularly peculiar because removing it is not as simple as just moving it's inner structure to the owning CTxMempoolEntry. Because TxLinks (the class storing the setEntries for parents and children) store txiters to each entry in the mempool corresponding to the parent or child, it means that the TxLinks type is "aware" of the boost multiindex (mapTx) it's coming from, which is in turn, aware of the entry type stored in mapTx. Thus we used maplinks to store this entry associated data we in an entirely separate data structure just to avoid a circular type reference caused by storing a txiter inside a CTxMempoolEntry. It turns out, we can kill this circular reference by making use of iterator_to multiindex function and std::reference_wrapper. This allows us to get rid of the maplinks data structure and move the ownership of the parents/child sets to the entries themselves. The benefit of this good all around, for any of the reasons given below the change would be acceptable, and it doesn't make the code harder to reason about or worse in any respect (as far as I can tell, there's no tradeoff). ### Simpler ownership model No longer having to consistency check that mapLinks did have records for our CTxMempoolEntry, impossible to have a mapLinks entry outlive or incorrectly die before a CTxMempoolEntry. ### Memory Usage We get rid of a O(Transactions) sized map in the mempool, which is a long lived data structure. ### Performance If you have a CTxMemPoolEntry, you immediately know the address of it's children/parents, rather than having to do a O(log(Transactions)) lookup via maplinks (which we do very often). We do it in *so many* places that a true benchmark has to look at a full running node, but it is easy enough to show an improvement in this case. The ComplexMemPool shows a good coherence check that we see the expected result of it being 12.5% faster / 1.14x faster. ``` Before: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.40462, 0.277222, 0.285339, 0.279793 After: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.22586, 0.243831, 0.247076, 0.244596 ``` The ComplexMemPool benchmark only checks doing addUnchecked and TrimToSize for 800 transactions. While this bench does a good job of hammering the relevant types of function, it doesn't test everything. Subbing in 5000 transactions shows a that the advantage isn't completely wiped out by other asymptotic factors (this isn't the only bottleneck in growing the mempool), but it's only a bit proportionally slower (10.8%, 1.12x), which adds evidence that this will be a good change for performance minded users. ``` # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 59.1321, 11.5919, 12.235, 11.7068 # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 52.1307, 10.2641, 10.5206, 10.4306 ``` I don't think it's possible to come up with an example of where a maplinks based design would have better performance, but it's something for reviewers to consider. # Discussion ## Why maplinks in the first place? I spoke with the author of mapLinks (sdaftuar) a while back, and my recollection from our conversation was that it was implemented because he did not know how to resolve the circular dependency at the time, and there was no other reason for making it a separate map. ## Is iterator_to weird? iterator_to is expressly for this purpose, see https://www.boost.org/doc/libs/1_51_0/libs/multi_index/doc/tutorial/indices.html#iterator_to > iterator_to provides a way to retrieve an iterator to an element from a pointer to the element, thus making iterators and pointers interchangeable for the purposes of element pointing (not so for traversal) in many situations. This notwithstanding, it is not the aim of iterator_to to promote the usage of pointers as substitutes for real iterators: the latter are specifically designed for handling the elements of a container, and not only benefit from the iterator orientation of container interfaces, but are also capable of exposing many more programming bugs than raw pointers, both at compile and run time. iterator_to is thus meant to be used in scenarios where access via iterators is not suitable or desireable: > > - Interoperability with preexisting APIs based on pointers or references. > - Publication of pointer-based interfaces (for instance, when designing a C-compatible library). > - The exposure of pointers in place of iterators can act as a type erasure barrier effectively decoupling the user of the code from the implementation detail of which particular container is being used. Similar techniques, like the famous Pimpl idiom, are used in large projects to reduce dependencies and build times. > - Self-referencing contexts where an element acts upon its owner container and no iterator to itself is available. In other words, iterator_to is the perfect tool for the job by the last reason given. Under the hood it should just be a simple pointer cast and have no major runtime overhead (depending on if the function call is inlined). Edit by laanwj: removed at sign from the description ACKs for top commit: jonatack: re-ACK 296be8f per `git range-diff ab338a19 3ba1665 296be8f`, sanity check gcc 10.2 debug build is clean. hebasto: re-ACK 296be8f58e02b39a58f017c52294aceed22c3ffd, only rebased since my [previous](https://github.com/bitcoin/bitcoin/pull/19478#pullrequestreview-482400727) review (verified with `git range-diff`). Tree-SHA512: f5c30a4936fcde6ae32a02823c303b3568a747c2681d11f87df88a149f984a6d3b4c81f391859afbeb68864ef7f6a3d8779f74a58e3de701b3d51f78e498682e
2020-09-07 12:06:38 +02:00
staged_ancestors.erase(stage);
totalSizeWithAncestors += stageit->GetTxSize();
estimatefee / estimatepriority RPC methods New RPC methods: return an estimate of the fee (or priority) a transaction needs to be likely to confirm in a given number of blocks. Mike Hearn created the first version of this method for estimating fees. It works as follows: For transactions that took 1 to N (I picked N=25) blocks to confirm, keep N buckets with at most 100 entries in each recording the fees-per-kilobyte paid by those transactions. (separate buckets are kept for transactions that confirmed because they are high-priority) The buckets are filled as blocks are found, and are saved/restored in a new fee_estiamtes.dat file in the data directory. A few variations on Mike's initial scheme: To estimate the fee needed for a transaction to confirm in X buckets, all of the samples in all of the buckets are used and a median of all of the data is used to make the estimate. For example, imagine 25 buckets each containing the full 100 entries. Those 2,500 samples are sorted, and the estimate of the fee needed to confirm in the very next block is the 50'th-highest-fee-entry in that sorted list; the estimate of the fee needed to confirm in the next two blocks is the 150'th-highest-fee-entry, etc. That algorithm has the nice property that estimates of how much fee you need to pay to get confirmed in block N will always be greater than or equal to the estimate for block N+1. It would clearly be wrong to say "pay 11 uBTC and you'll get confirmed in 3 blocks, but pay 12 uBTC and it will take LONGER". A single block will not contribute more than 10 entries to any one bucket, so a single miner and a large block cannot overwhelm the estimates.
2014-03-17 13:19:54 +01:00
if (stageit->GetSizeWithDescendants() + entry.GetTxSize() > limitDescendantSize) {
errString = strprintf("exceeds descendant size limit for tx %s [limit: %u]", stageit->GetTx().GetHash().ToString(), limitDescendantSize);
return false;
} else if (stageit->GetCountWithDescendants() + 1 > limitDescendantCount) {
errString = strprintf("too many descendants for tx %s [limit: %u]", stageit->GetTx().GetHash().ToString(), limitDescendantCount);
return false;
} else if (totalSizeWithAncestors > limitAncestorSize) {
errString = strprintf("exceeds ancestor size limit [limit: %u]", limitAncestorSize);
return false;
}
estimatefee / estimatepriority RPC methods New RPC methods: return an estimate of the fee (or priority) a transaction needs to be likely to confirm in a given number of blocks. Mike Hearn created the first version of this method for estimating fees. It works as follows: For transactions that took 1 to N (I picked N=25) blocks to confirm, keep N buckets with at most 100 entries in each recording the fees-per-kilobyte paid by those transactions. (separate buckets are kept for transactions that confirmed because they are high-priority) The buckets are filled as blocks are found, and are saved/restored in a new fee_estiamtes.dat file in the data directory. A few variations on Mike's initial scheme: To estimate the fee needed for a transaction to confirm in X buckets, all of the samples in all of the buckets are used and a median of all of the data is used to make the estimate. For example, imagine 25 buckets each containing the full 100 entries. Those 2,500 samples are sorted, and the estimate of the fee needed to confirm in the very next block is the 50'th-highest-fee-entry in that sorted list; the estimate of the fee needed to confirm in the next two blocks is the 150'th-highest-fee-entry, etc. That algorithm has the nice property that estimates of how much fee you need to pay to get confirmed in block N will always be greater than or equal to the estimate for block N+1. It would clearly be wrong to say "pay 11 uBTC and you'll get confirmed in 3 blocks, but pay 12 uBTC and it will take LONGER". A single block will not contribute more than 10 entries to any one bucket, so a single miner and a large block cannot overwhelm the estimates.
2014-03-17 13:19:54 +01:00
Merge #19478: Remove CTxMempool::mapLinks data structure member 296be8f58e02b39a58f017c52294aceed22c3ffd Get rid of unused functions CTxMemPool::GetMemPoolChildren, CTxMemPool::GetMemPoolParents (Jeremy Rubin) 46d955d196043cc297834baeebce31ff778dff80 Remove mapLinks in favor of entry inlined structs with iterator type erasure (Jeremy Rubin) Pull request description: Currently we have a peculiar data structure in the mempool called maplinks. Maplinks job is to track the in-pool children and parents of each transaction. This PR can be primarily understood and reviewed as a simple refactoring to remove this extra data structure, although it comes with a nice memory and performance improvement for free. Maplinks is particularly peculiar because removing it is not as simple as just moving it's inner structure to the owning CTxMempoolEntry. Because TxLinks (the class storing the setEntries for parents and children) store txiters to each entry in the mempool corresponding to the parent or child, it means that the TxLinks type is "aware" of the boost multiindex (mapTx) it's coming from, which is in turn, aware of the entry type stored in mapTx. Thus we used maplinks to store this entry associated data we in an entirely separate data structure just to avoid a circular type reference caused by storing a txiter inside a CTxMempoolEntry. It turns out, we can kill this circular reference by making use of iterator_to multiindex function and std::reference_wrapper. This allows us to get rid of the maplinks data structure and move the ownership of the parents/child sets to the entries themselves. The benefit of this good all around, for any of the reasons given below the change would be acceptable, and it doesn't make the code harder to reason about or worse in any respect (as far as I can tell, there's no tradeoff). ### Simpler ownership model No longer having to consistency check that mapLinks did have records for our CTxMempoolEntry, impossible to have a mapLinks entry outlive or incorrectly die before a CTxMempoolEntry. ### Memory Usage We get rid of a O(Transactions) sized map in the mempool, which is a long lived data structure. ### Performance If you have a CTxMemPoolEntry, you immediately know the address of it's children/parents, rather than having to do a O(log(Transactions)) lookup via maplinks (which we do very often). We do it in *so many* places that a true benchmark has to look at a full running node, but it is easy enough to show an improvement in this case. The ComplexMemPool shows a good coherence check that we see the expected result of it being 12.5% faster / 1.14x faster. ``` Before: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.40462, 0.277222, 0.285339, 0.279793 After: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.22586, 0.243831, 0.247076, 0.244596 ``` The ComplexMemPool benchmark only checks doing addUnchecked and TrimToSize for 800 transactions. While this bench does a good job of hammering the relevant types of function, it doesn't test everything. Subbing in 5000 transactions shows a that the advantage isn't completely wiped out by other asymptotic factors (this isn't the only bottleneck in growing the mempool), but it's only a bit proportionally slower (10.8%, 1.12x), which adds evidence that this will be a good change for performance minded users. ``` # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 59.1321, 11.5919, 12.235, 11.7068 # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 52.1307, 10.2641, 10.5206, 10.4306 ``` I don't think it's possible to come up with an example of where a maplinks based design would have better performance, but it's something for reviewers to consider. # Discussion ## Why maplinks in the first place? I spoke with the author of mapLinks (sdaftuar) a while back, and my recollection from our conversation was that it was implemented because he did not know how to resolve the circular dependency at the time, and there was no other reason for making it a separate map. ## Is iterator_to weird? iterator_to is expressly for this purpose, see https://www.boost.org/doc/libs/1_51_0/libs/multi_index/doc/tutorial/indices.html#iterator_to > iterator_to provides a way to retrieve an iterator to an element from a pointer to the element, thus making iterators and pointers interchangeable for the purposes of element pointing (not so for traversal) in many situations. This notwithstanding, it is not the aim of iterator_to to promote the usage of pointers as substitutes for real iterators: the latter are specifically designed for handling the elements of a container, and not only benefit from the iterator orientation of container interfaces, but are also capable of exposing many more programming bugs than raw pointers, both at compile and run time. iterator_to is thus meant to be used in scenarios where access via iterators is not suitable or desireable: > > - Interoperability with preexisting APIs based on pointers or references. > - Publication of pointer-based interfaces (for instance, when designing a C-compatible library). > - The exposure of pointers in place of iterators can act as a type erasure barrier effectively decoupling the user of the code from the implementation detail of which particular container is being used. Similar techniques, like the famous Pimpl idiom, are used in large projects to reduce dependencies and build times. > - Self-referencing contexts where an element acts upon its owner container and no iterator to itself is available. In other words, iterator_to is the perfect tool for the job by the last reason given. Under the hood it should just be a simple pointer cast and have no major runtime overhead (depending on if the function call is inlined). Edit by laanwj: removed at sign from the description ACKs for top commit: jonatack: re-ACK 296be8f per `git range-diff ab338a19 3ba1665 296be8f`, sanity check gcc 10.2 debug build is clean. hebasto: re-ACK 296be8f58e02b39a58f017c52294aceed22c3ffd, only rebased since my [previous](https://github.com/bitcoin/bitcoin/pull/19478#pullrequestreview-482400727) review (verified with `git range-diff`). Tree-SHA512: f5c30a4936fcde6ae32a02823c303b3568a747c2681d11f87df88a149f984a6d3b4c81f391859afbeb68864ef7f6a3d8779f74a58e3de701b3d51f78e498682e
2020-09-07 12:06:38 +02:00
const CTxMemPoolEntry::Parents& parents = stageit->GetMemPoolParentsConst();
for (const CTxMemPoolEntry& parent : parents) {
txiter parent_it = mapTx.iterator_to(parent);
// If this is a new ancestor, add it.
Merge #19478: Remove CTxMempool::mapLinks data structure member 296be8f58e02b39a58f017c52294aceed22c3ffd Get rid of unused functions CTxMemPool::GetMemPoolChildren, CTxMemPool::GetMemPoolParents (Jeremy Rubin) 46d955d196043cc297834baeebce31ff778dff80 Remove mapLinks in favor of entry inlined structs with iterator type erasure (Jeremy Rubin) Pull request description: Currently we have a peculiar data structure in the mempool called maplinks. Maplinks job is to track the in-pool children and parents of each transaction. This PR can be primarily understood and reviewed as a simple refactoring to remove this extra data structure, although it comes with a nice memory and performance improvement for free. Maplinks is particularly peculiar because removing it is not as simple as just moving it's inner structure to the owning CTxMempoolEntry. Because TxLinks (the class storing the setEntries for parents and children) store txiters to each entry in the mempool corresponding to the parent or child, it means that the TxLinks type is "aware" of the boost multiindex (mapTx) it's coming from, which is in turn, aware of the entry type stored in mapTx. Thus we used maplinks to store this entry associated data we in an entirely separate data structure just to avoid a circular type reference caused by storing a txiter inside a CTxMempoolEntry. It turns out, we can kill this circular reference by making use of iterator_to multiindex function and std::reference_wrapper. This allows us to get rid of the maplinks data structure and move the ownership of the parents/child sets to the entries themselves. The benefit of this good all around, for any of the reasons given below the change would be acceptable, and it doesn't make the code harder to reason about or worse in any respect (as far as I can tell, there's no tradeoff). ### Simpler ownership model No longer having to consistency check that mapLinks did have records for our CTxMempoolEntry, impossible to have a mapLinks entry outlive or incorrectly die before a CTxMempoolEntry. ### Memory Usage We get rid of a O(Transactions) sized map in the mempool, which is a long lived data structure. ### Performance If you have a CTxMemPoolEntry, you immediately know the address of it's children/parents, rather than having to do a O(log(Transactions)) lookup via maplinks (which we do very often). We do it in *so many* places that a true benchmark has to look at a full running node, but it is easy enough to show an improvement in this case. The ComplexMemPool shows a good coherence check that we see the expected result of it being 12.5% faster / 1.14x faster. ``` Before: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.40462, 0.277222, 0.285339, 0.279793 After: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.22586, 0.243831, 0.247076, 0.244596 ``` The ComplexMemPool benchmark only checks doing addUnchecked and TrimToSize for 800 transactions. While this bench does a good job of hammering the relevant types of function, it doesn't test everything. Subbing in 5000 transactions shows a that the advantage isn't completely wiped out by other asymptotic factors (this isn't the only bottleneck in growing the mempool), but it's only a bit proportionally slower (10.8%, 1.12x), which adds evidence that this will be a good change for performance minded users. ``` # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 59.1321, 11.5919, 12.235, 11.7068 # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 52.1307, 10.2641, 10.5206, 10.4306 ``` I don't think it's possible to come up with an example of where a maplinks based design would have better performance, but it's something for reviewers to consider. # Discussion ## Why maplinks in the first place? I spoke with the author of mapLinks (sdaftuar) a while back, and my recollection from our conversation was that it was implemented because he did not know how to resolve the circular dependency at the time, and there was no other reason for making it a separate map. ## Is iterator_to weird? iterator_to is expressly for this purpose, see https://www.boost.org/doc/libs/1_51_0/libs/multi_index/doc/tutorial/indices.html#iterator_to > iterator_to provides a way to retrieve an iterator to an element from a pointer to the element, thus making iterators and pointers interchangeable for the purposes of element pointing (not so for traversal) in many situations. This notwithstanding, it is not the aim of iterator_to to promote the usage of pointers as substitutes for real iterators: the latter are specifically designed for handling the elements of a container, and not only benefit from the iterator orientation of container interfaces, but are also capable of exposing many more programming bugs than raw pointers, both at compile and run time. iterator_to is thus meant to be used in scenarios where access via iterators is not suitable or desireable: > > - Interoperability with preexisting APIs based on pointers or references. > - Publication of pointer-based interfaces (for instance, when designing a C-compatible library). > - The exposure of pointers in place of iterators can act as a type erasure barrier effectively decoupling the user of the code from the implementation detail of which particular container is being used. Similar techniques, like the famous Pimpl idiom, are used in large projects to reduce dependencies and build times. > - Self-referencing contexts where an element acts upon its owner container and no iterator to itself is available. In other words, iterator_to is the perfect tool for the job by the last reason given. Under the hood it should just be a simple pointer cast and have no major runtime overhead (depending on if the function call is inlined). Edit by laanwj: removed at sign from the description ACKs for top commit: jonatack: re-ACK 296be8f per `git range-diff ab338a19 3ba1665 296be8f`, sanity check gcc 10.2 debug build is clean. hebasto: re-ACK 296be8f58e02b39a58f017c52294aceed22c3ffd, only rebased since my [previous](https://github.com/bitcoin/bitcoin/pull/19478#pullrequestreview-482400727) review (verified with `git range-diff`). Tree-SHA512: f5c30a4936fcde6ae32a02823c303b3568a747c2681d11f87df88a149f984a6d3b4c81f391859afbeb68864ef7f6a3d8779f74a58e3de701b3d51f78e498682e
2020-09-07 12:06:38 +02:00
if (setAncestors.count(parent_it) == 0) {
staged_ancestors.insert(parent);
}
Merge #19478: Remove CTxMempool::mapLinks data structure member 296be8f58e02b39a58f017c52294aceed22c3ffd Get rid of unused functions CTxMemPool::GetMemPoolChildren, CTxMemPool::GetMemPoolParents (Jeremy Rubin) 46d955d196043cc297834baeebce31ff778dff80 Remove mapLinks in favor of entry inlined structs with iterator type erasure (Jeremy Rubin) Pull request description: Currently we have a peculiar data structure in the mempool called maplinks. Maplinks job is to track the in-pool children and parents of each transaction. This PR can be primarily understood and reviewed as a simple refactoring to remove this extra data structure, although it comes with a nice memory and performance improvement for free. Maplinks is particularly peculiar because removing it is not as simple as just moving it's inner structure to the owning CTxMempoolEntry. Because TxLinks (the class storing the setEntries for parents and children) store txiters to each entry in the mempool corresponding to the parent or child, it means that the TxLinks type is "aware" of the boost multiindex (mapTx) it's coming from, which is in turn, aware of the entry type stored in mapTx. Thus we used maplinks to store this entry associated data we in an entirely separate data structure just to avoid a circular type reference caused by storing a txiter inside a CTxMempoolEntry. It turns out, we can kill this circular reference by making use of iterator_to multiindex function and std::reference_wrapper. This allows us to get rid of the maplinks data structure and move the ownership of the parents/child sets to the entries themselves. The benefit of this good all around, for any of the reasons given below the change would be acceptable, and it doesn't make the code harder to reason about or worse in any respect (as far as I can tell, there's no tradeoff). ### Simpler ownership model No longer having to consistency check that mapLinks did have records for our CTxMempoolEntry, impossible to have a mapLinks entry outlive or incorrectly die before a CTxMempoolEntry. ### Memory Usage We get rid of a O(Transactions) sized map in the mempool, which is a long lived data structure. ### Performance If you have a CTxMemPoolEntry, you immediately know the address of it's children/parents, rather than having to do a O(log(Transactions)) lookup via maplinks (which we do very often). We do it in *so many* places that a true benchmark has to look at a full running node, but it is easy enough to show an improvement in this case. The ComplexMemPool shows a good coherence check that we see the expected result of it being 12.5% faster / 1.14x faster. ``` Before: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.40462, 0.277222, 0.285339, 0.279793 After: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.22586, 0.243831, 0.247076, 0.244596 ``` The ComplexMemPool benchmark only checks doing addUnchecked and TrimToSize for 800 transactions. While this bench does a good job of hammering the relevant types of function, it doesn't test everything. Subbing in 5000 transactions shows a that the advantage isn't completely wiped out by other asymptotic factors (this isn't the only bottleneck in growing the mempool), but it's only a bit proportionally slower (10.8%, 1.12x), which adds evidence that this will be a good change for performance minded users. ``` # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 59.1321, 11.5919, 12.235, 11.7068 # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 52.1307, 10.2641, 10.5206, 10.4306 ``` I don't think it's possible to come up with an example of where a maplinks based design would have better performance, but it's something for reviewers to consider. # Discussion ## Why maplinks in the first place? I spoke with the author of mapLinks (sdaftuar) a while back, and my recollection from our conversation was that it was implemented because he did not know how to resolve the circular dependency at the time, and there was no other reason for making it a separate map. ## Is iterator_to weird? iterator_to is expressly for this purpose, see https://www.boost.org/doc/libs/1_51_0/libs/multi_index/doc/tutorial/indices.html#iterator_to > iterator_to provides a way to retrieve an iterator to an element from a pointer to the element, thus making iterators and pointers interchangeable for the purposes of element pointing (not so for traversal) in many situations. This notwithstanding, it is not the aim of iterator_to to promote the usage of pointers as substitutes for real iterators: the latter are specifically designed for handling the elements of a container, and not only benefit from the iterator orientation of container interfaces, but are also capable of exposing many more programming bugs than raw pointers, both at compile and run time. iterator_to is thus meant to be used in scenarios where access via iterators is not suitable or desireable: > > - Interoperability with preexisting APIs based on pointers or references. > - Publication of pointer-based interfaces (for instance, when designing a C-compatible library). > - The exposure of pointers in place of iterators can act as a type erasure barrier effectively decoupling the user of the code from the implementation detail of which particular container is being used. Similar techniques, like the famous Pimpl idiom, are used in large projects to reduce dependencies and build times. > - Self-referencing contexts where an element acts upon its owner container and no iterator to itself is available. In other words, iterator_to is the perfect tool for the job by the last reason given. Under the hood it should just be a simple pointer cast and have no major runtime overhead (depending on if the function call is inlined). Edit by laanwj: removed at sign from the description ACKs for top commit: jonatack: re-ACK 296be8f per `git range-diff ab338a19 3ba1665 296be8f`, sanity check gcc 10.2 debug build is clean. hebasto: re-ACK 296be8f58e02b39a58f017c52294aceed22c3ffd, only rebased since my [previous](https://github.com/bitcoin/bitcoin/pull/19478#pullrequestreview-482400727) review (verified with `git range-diff`). Tree-SHA512: f5c30a4936fcde6ae32a02823c303b3568a747c2681d11f87df88a149f984a6d3b4c81f391859afbeb68864ef7f6a3d8779f74a58e3de701b3d51f78e498682e
2020-09-07 12:06:38 +02:00
if (staged_ancestors.size() + setAncestors.size() + 1 > limitAncestorCount) {
errString = strprintf("too many unconfirmed ancestors [limit: %u]", limitAncestorCount);
return false;
}
}
estimatefee / estimatepriority RPC methods New RPC methods: return an estimate of the fee (or priority) a transaction needs to be likely to confirm in a given number of blocks. Mike Hearn created the first version of this method for estimating fees. It works as follows: For transactions that took 1 to N (I picked N=25) blocks to confirm, keep N buckets with at most 100 entries in each recording the fees-per-kilobyte paid by those transactions. (separate buckets are kept for transactions that confirmed because they are high-priority) The buckets are filled as blocks are found, and are saved/restored in a new fee_estiamtes.dat file in the data directory. A few variations on Mike's initial scheme: To estimate the fee needed for a transaction to confirm in X buckets, all of the samples in all of the buckets are used and a median of all of the data is used to make the estimate. For example, imagine 25 buckets each containing the full 100 entries. Those 2,500 samples are sorted, and the estimate of the fee needed to confirm in the very next block is the 50'th-highest-fee-entry in that sorted list; the estimate of the fee needed to confirm in the next two blocks is the 150'th-highest-fee-entry, etc. That algorithm has the nice property that estimates of how much fee you need to pay to get confirmed in block N will always be greater than or equal to the estimate for block N+1. It would clearly be wrong to say "pay 11 uBTC and you'll get confirmed in 3 blocks, but pay 12 uBTC and it will take LONGER". A single block will not contribute more than 10 entries to any one bucket, so a single miner and a large block cannot overwhelm the estimates.
2014-03-17 13:19:54 +01:00
}
return true;
}
estimatefee / estimatepriority RPC methods New RPC methods: return an estimate of the fee (or priority) a transaction needs to be likely to confirm in a given number of blocks. Mike Hearn created the first version of this method for estimating fees. It works as follows: For transactions that took 1 to N (I picked N=25) blocks to confirm, keep N buckets with at most 100 entries in each recording the fees-per-kilobyte paid by those transactions. (separate buckets are kept for transactions that confirmed because they are high-priority) The buckets are filled as blocks are found, and are saved/restored in a new fee_estiamtes.dat file in the data directory. A few variations on Mike's initial scheme: To estimate the fee needed for a transaction to confirm in X buckets, all of the samples in all of the buckets are used and a median of all of the data is used to make the estimate. For example, imagine 25 buckets each containing the full 100 entries. Those 2,500 samples are sorted, and the estimate of the fee needed to confirm in the very next block is the 50'th-highest-fee-entry in that sorted list; the estimate of the fee needed to confirm in the next two blocks is the 150'th-highest-fee-entry, etc. That algorithm has the nice property that estimates of how much fee you need to pay to get confirmed in block N will always be greater than or equal to the estimate for block N+1. It would clearly be wrong to say "pay 11 uBTC and you'll get confirmed in 3 blocks, but pay 12 uBTC and it will take LONGER". A single block will not contribute more than 10 entries to any one bucket, so a single miner and a large block cannot overwhelm the estimates.
2014-03-17 13:19:54 +01:00
void CTxMemPool::UpdateAncestorsOf(bool add, txiter it, setEntries &setAncestors)
{
Merge #19478: Remove CTxMempool::mapLinks data structure member 296be8f58e02b39a58f017c52294aceed22c3ffd Get rid of unused functions CTxMemPool::GetMemPoolChildren, CTxMemPool::GetMemPoolParents (Jeremy Rubin) 46d955d196043cc297834baeebce31ff778dff80 Remove mapLinks in favor of entry inlined structs with iterator type erasure (Jeremy Rubin) Pull request description: Currently we have a peculiar data structure in the mempool called maplinks. Maplinks job is to track the in-pool children and parents of each transaction. This PR can be primarily understood and reviewed as a simple refactoring to remove this extra data structure, although it comes with a nice memory and performance improvement for free. Maplinks is particularly peculiar because removing it is not as simple as just moving it's inner structure to the owning CTxMempoolEntry. Because TxLinks (the class storing the setEntries for parents and children) store txiters to each entry in the mempool corresponding to the parent or child, it means that the TxLinks type is "aware" of the boost multiindex (mapTx) it's coming from, which is in turn, aware of the entry type stored in mapTx. Thus we used maplinks to store this entry associated data we in an entirely separate data structure just to avoid a circular type reference caused by storing a txiter inside a CTxMempoolEntry. It turns out, we can kill this circular reference by making use of iterator_to multiindex function and std::reference_wrapper. This allows us to get rid of the maplinks data structure and move the ownership of the parents/child sets to the entries themselves. The benefit of this good all around, for any of the reasons given below the change would be acceptable, and it doesn't make the code harder to reason about or worse in any respect (as far as I can tell, there's no tradeoff). ### Simpler ownership model No longer having to consistency check that mapLinks did have records for our CTxMempoolEntry, impossible to have a mapLinks entry outlive or incorrectly die before a CTxMempoolEntry. ### Memory Usage We get rid of a O(Transactions) sized map in the mempool, which is a long lived data structure. ### Performance If you have a CTxMemPoolEntry, you immediately know the address of it's children/parents, rather than having to do a O(log(Transactions)) lookup via maplinks (which we do very often). We do it in *so many* places that a true benchmark has to look at a full running node, but it is easy enough to show an improvement in this case. The ComplexMemPool shows a good coherence check that we see the expected result of it being 12.5% faster / 1.14x faster. ``` Before: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.40462, 0.277222, 0.285339, 0.279793 After: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.22586, 0.243831, 0.247076, 0.244596 ``` The ComplexMemPool benchmark only checks doing addUnchecked and TrimToSize for 800 transactions. While this bench does a good job of hammering the relevant types of function, it doesn't test everything. Subbing in 5000 transactions shows a that the advantage isn't completely wiped out by other asymptotic factors (this isn't the only bottleneck in growing the mempool), but it's only a bit proportionally slower (10.8%, 1.12x), which adds evidence that this will be a good change for performance minded users. ``` # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 59.1321, 11.5919, 12.235, 11.7068 # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 52.1307, 10.2641, 10.5206, 10.4306 ``` I don't think it's possible to come up with an example of where a maplinks based design would have better performance, but it's something for reviewers to consider. # Discussion ## Why maplinks in the first place? I spoke with the author of mapLinks (sdaftuar) a while back, and my recollection from our conversation was that it was implemented because he did not know how to resolve the circular dependency at the time, and there was no other reason for making it a separate map. ## Is iterator_to weird? iterator_to is expressly for this purpose, see https://www.boost.org/doc/libs/1_51_0/libs/multi_index/doc/tutorial/indices.html#iterator_to > iterator_to provides a way to retrieve an iterator to an element from a pointer to the element, thus making iterators and pointers interchangeable for the purposes of element pointing (not so for traversal) in many situations. This notwithstanding, it is not the aim of iterator_to to promote the usage of pointers as substitutes for real iterators: the latter are specifically designed for handling the elements of a container, and not only benefit from the iterator orientation of container interfaces, but are also capable of exposing many more programming bugs than raw pointers, both at compile and run time. iterator_to is thus meant to be used in scenarios where access via iterators is not suitable or desireable: > > - Interoperability with preexisting APIs based on pointers or references. > - Publication of pointer-based interfaces (for instance, when designing a C-compatible library). > - The exposure of pointers in place of iterators can act as a type erasure barrier effectively decoupling the user of the code from the implementation detail of which particular container is being used. Similar techniques, like the famous Pimpl idiom, are used in large projects to reduce dependencies and build times. > - Self-referencing contexts where an element acts upon its owner container and no iterator to itself is available. In other words, iterator_to is the perfect tool for the job by the last reason given. Under the hood it should just be a simple pointer cast and have no major runtime overhead (depending on if the function call is inlined). Edit by laanwj: removed at sign from the description ACKs for top commit: jonatack: re-ACK 296be8f per `git range-diff ab338a19 3ba1665 296be8f`, sanity check gcc 10.2 debug build is clean. hebasto: re-ACK 296be8f58e02b39a58f017c52294aceed22c3ffd, only rebased since my [previous](https://github.com/bitcoin/bitcoin/pull/19478#pullrequestreview-482400727) review (verified with `git range-diff`). Tree-SHA512: f5c30a4936fcde6ae32a02823c303b3568a747c2681d11f87df88a149f984a6d3b4c81f391859afbeb68864ef7f6a3d8779f74a58e3de701b3d51f78e498682e
2020-09-07 12:06:38 +02:00
CTxMemPoolEntry::Parents parents = it->GetMemPoolParents();
// add or remove this tx as a child of each parent
Merge #19478: Remove CTxMempool::mapLinks data structure member 296be8f58e02b39a58f017c52294aceed22c3ffd Get rid of unused functions CTxMemPool::GetMemPoolChildren, CTxMemPool::GetMemPoolParents (Jeremy Rubin) 46d955d196043cc297834baeebce31ff778dff80 Remove mapLinks in favor of entry inlined structs with iterator type erasure (Jeremy Rubin) Pull request description: Currently we have a peculiar data structure in the mempool called maplinks. Maplinks job is to track the in-pool children and parents of each transaction. This PR can be primarily understood and reviewed as a simple refactoring to remove this extra data structure, although it comes with a nice memory and performance improvement for free. Maplinks is particularly peculiar because removing it is not as simple as just moving it's inner structure to the owning CTxMempoolEntry. Because TxLinks (the class storing the setEntries for parents and children) store txiters to each entry in the mempool corresponding to the parent or child, it means that the TxLinks type is "aware" of the boost multiindex (mapTx) it's coming from, which is in turn, aware of the entry type stored in mapTx. Thus we used maplinks to store this entry associated data we in an entirely separate data structure just to avoid a circular type reference caused by storing a txiter inside a CTxMempoolEntry. It turns out, we can kill this circular reference by making use of iterator_to multiindex function and std::reference_wrapper. This allows us to get rid of the maplinks data structure and move the ownership of the parents/child sets to the entries themselves. The benefit of this good all around, for any of the reasons given below the change would be acceptable, and it doesn't make the code harder to reason about or worse in any respect (as far as I can tell, there's no tradeoff). ### Simpler ownership model No longer having to consistency check that mapLinks did have records for our CTxMempoolEntry, impossible to have a mapLinks entry outlive or incorrectly die before a CTxMempoolEntry. ### Memory Usage We get rid of a O(Transactions) sized map in the mempool, which is a long lived data structure. ### Performance If you have a CTxMemPoolEntry, you immediately know the address of it's children/parents, rather than having to do a O(log(Transactions)) lookup via maplinks (which we do very often). We do it in *so many* places that a true benchmark has to look at a full running node, but it is easy enough to show an improvement in this case. The ComplexMemPool shows a good coherence check that we see the expected result of it being 12.5% faster / 1.14x faster. ``` Before: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.40462, 0.277222, 0.285339, 0.279793 After: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.22586, 0.243831, 0.247076, 0.244596 ``` The ComplexMemPool benchmark only checks doing addUnchecked and TrimToSize for 800 transactions. While this bench does a good job of hammering the relevant types of function, it doesn't test everything. Subbing in 5000 transactions shows a that the advantage isn't completely wiped out by other asymptotic factors (this isn't the only bottleneck in growing the mempool), but it's only a bit proportionally slower (10.8%, 1.12x), which adds evidence that this will be a good change for performance minded users. ``` # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 59.1321, 11.5919, 12.235, 11.7068 # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 52.1307, 10.2641, 10.5206, 10.4306 ``` I don't think it's possible to come up with an example of where a maplinks based design would have better performance, but it's something for reviewers to consider. # Discussion ## Why maplinks in the first place? I spoke with the author of mapLinks (sdaftuar) a while back, and my recollection from our conversation was that it was implemented because he did not know how to resolve the circular dependency at the time, and there was no other reason for making it a separate map. ## Is iterator_to weird? iterator_to is expressly for this purpose, see https://www.boost.org/doc/libs/1_51_0/libs/multi_index/doc/tutorial/indices.html#iterator_to > iterator_to provides a way to retrieve an iterator to an element from a pointer to the element, thus making iterators and pointers interchangeable for the purposes of element pointing (not so for traversal) in many situations. This notwithstanding, it is not the aim of iterator_to to promote the usage of pointers as substitutes for real iterators: the latter are specifically designed for handling the elements of a container, and not only benefit from the iterator orientation of container interfaces, but are also capable of exposing many more programming bugs than raw pointers, both at compile and run time. iterator_to is thus meant to be used in scenarios where access via iterators is not suitable or desireable: > > - Interoperability with preexisting APIs based on pointers or references. > - Publication of pointer-based interfaces (for instance, when designing a C-compatible library). > - The exposure of pointers in place of iterators can act as a type erasure barrier effectively decoupling the user of the code from the implementation detail of which particular container is being used. Similar techniques, like the famous Pimpl idiom, are used in large projects to reduce dependencies and build times. > - Self-referencing contexts where an element acts upon its owner container and no iterator to itself is available. In other words, iterator_to is the perfect tool for the job by the last reason given. Under the hood it should just be a simple pointer cast and have no major runtime overhead (depending on if the function call is inlined). Edit by laanwj: removed at sign from the description ACKs for top commit: jonatack: re-ACK 296be8f per `git range-diff ab338a19 3ba1665 296be8f`, sanity check gcc 10.2 debug build is clean. hebasto: re-ACK 296be8f58e02b39a58f017c52294aceed22c3ffd, only rebased since my [previous](https://github.com/bitcoin/bitcoin/pull/19478#pullrequestreview-482400727) review (verified with `git range-diff`). Tree-SHA512: f5c30a4936fcde6ae32a02823c303b3568a747c2681d11f87df88a149f984a6d3b4c81f391859afbeb68864ef7f6a3d8779f74a58e3de701b3d51f78e498682e
2020-09-07 12:06:38 +02:00
for (const CTxMemPoolEntry& parent : parents) {
UpdateChild(mapTx.iterator_to(parent), it, add);
}
const int64_t updateCount = (add ? 1 : -1);
const int64_t updateSize = updateCount * it->GetTxSize();
const CAmount updateFee = updateCount * it->GetModifiedFee();
for (txiter ancestorIt : setAncestors) {
mapTx.modify(ancestorIt, update_descendant_state(updateSize, updateFee, updateCount));
}
}
estimatefee / estimatepriority RPC methods New RPC methods: return an estimate of the fee (or priority) a transaction needs to be likely to confirm in a given number of blocks. Mike Hearn created the first version of this method for estimating fees. It works as follows: For transactions that took 1 to N (I picked N=25) blocks to confirm, keep N buckets with at most 100 entries in each recording the fees-per-kilobyte paid by those transactions. (separate buckets are kept for transactions that confirmed because they are high-priority) The buckets are filled as blocks are found, and are saved/restored in a new fee_estiamtes.dat file in the data directory. A few variations on Mike's initial scheme: To estimate the fee needed for a transaction to confirm in X buckets, all of the samples in all of the buckets are used and a median of all of the data is used to make the estimate. For example, imagine 25 buckets each containing the full 100 entries. Those 2,500 samples are sorted, and the estimate of the fee needed to confirm in the very next block is the 50'th-highest-fee-entry in that sorted list; the estimate of the fee needed to confirm in the next two blocks is the 150'th-highest-fee-entry, etc. That algorithm has the nice property that estimates of how much fee you need to pay to get confirmed in block N will always be greater than or equal to the estimate for block N+1. It would clearly be wrong to say "pay 11 uBTC and you'll get confirmed in 3 blocks, but pay 12 uBTC and it will take LONGER". A single block will not contribute more than 10 entries to any one bucket, so a single miner and a large block cannot overwhelm the estimates.
2014-03-17 13:19:54 +01:00
void CTxMemPool::UpdateEntryForAncestors(txiter it, const setEntries &setAncestors)
{
int64_t updateCount = setAncestors.size();
int64_t updateSize = 0;
CAmount updateFee = 0;
int updateSigOps = 0;
for (txiter ancestorIt : setAncestors) {
updateSize += ancestorIt->GetTxSize();
updateFee += ancestorIt->GetModifiedFee();
updateSigOps += ancestorIt->GetSigOpCount();
}
mapTx.modify(it, update_ancestor_state(updateSize, updateFee, updateCount, updateSigOps));
}
void CTxMemPool::UpdateChildrenForRemoval(txiter it)
{
Merge #19478: Remove CTxMempool::mapLinks data structure member 296be8f58e02b39a58f017c52294aceed22c3ffd Get rid of unused functions CTxMemPool::GetMemPoolChildren, CTxMemPool::GetMemPoolParents (Jeremy Rubin) 46d955d196043cc297834baeebce31ff778dff80 Remove mapLinks in favor of entry inlined structs with iterator type erasure (Jeremy Rubin) Pull request description: Currently we have a peculiar data structure in the mempool called maplinks. Maplinks job is to track the in-pool children and parents of each transaction. This PR can be primarily understood and reviewed as a simple refactoring to remove this extra data structure, although it comes with a nice memory and performance improvement for free. Maplinks is particularly peculiar because removing it is not as simple as just moving it's inner structure to the owning CTxMempoolEntry. Because TxLinks (the class storing the setEntries for parents and children) store txiters to each entry in the mempool corresponding to the parent or child, it means that the TxLinks type is "aware" of the boost multiindex (mapTx) it's coming from, which is in turn, aware of the entry type stored in mapTx. Thus we used maplinks to store this entry associated data we in an entirely separate data structure just to avoid a circular type reference caused by storing a txiter inside a CTxMempoolEntry. It turns out, we can kill this circular reference by making use of iterator_to multiindex function and std::reference_wrapper. This allows us to get rid of the maplinks data structure and move the ownership of the parents/child sets to the entries themselves. The benefit of this good all around, for any of the reasons given below the change would be acceptable, and it doesn't make the code harder to reason about or worse in any respect (as far as I can tell, there's no tradeoff). ### Simpler ownership model No longer having to consistency check that mapLinks did have records for our CTxMempoolEntry, impossible to have a mapLinks entry outlive or incorrectly die before a CTxMempoolEntry. ### Memory Usage We get rid of a O(Transactions) sized map in the mempool, which is a long lived data structure. ### Performance If you have a CTxMemPoolEntry, you immediately know the address of it's children/parents, rather than having to do a O(log(Transactions)) lookup via maplinks (which we do very often). We do it in *so many* places that a true benchmark has to look at a full running node, but it is easy enough to show an improvement in this case. The ComplexMemPool shows a good coherence check that we see the expected result of it being 12.5% faster / 1.14x faster. ``` Before: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.40462, 0.277222, 0.285339, 0.279793 After: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.22586, 0.243831, 0.247076, 0.244596 ``` The ComplexMemPool benchmark only checks doing addUnchecked and TrimToSize for 800 transactions. While this bench does a good job of hammering the relevant types of function, it doesn't test everything. Subbing in 5000 transactions shows a that the advantage isn't completely wiped out by other asymptotic factors (this isn't the only bottleneck in growing the mempool), but it's only a bit proportionally slower (10.8%, 1.12x), which adds evidence that this will be a good change for performance minded users. ``` # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 59.1321, 11.5919, 12.235, 11.7068 # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 52.1307, 10.2641, 10.5206, 10.4306 ``` I don't think it's possible to come up with an example of where a maplinks based design would have better performance, but it's something for reviewers to consider. # Discussion ## Why maplinks in the first place? I spoke with the author of mapLinks (sdaftuar) a while back, and my recollection from our conversation was that it was implemented because he did not know how to resolve the circular dependency at the time, and there was no other reason for making it a separate map. ## Is iterator_to weird? iterator_to is expressly for this purpose, see https://www.boost.org/doc/libs/1_51_0/libs/multi_index/doc/tutorial/indices.html#iterator_to > iterator_to provides a way to retrieve an iterator to an element from a pointer to the element, thus making iterators and pointers interchangeable for the purposes of element pointing (not so for traversal) in many situations. This notwithstanding, it is not the aim of iterator_to to promote the usage of pointers as substitutes for real iterators: the latter are specifically designed for handling the elements of a container, and not only benefit from the iterator orientation of container interfaces, but are also capable of exposing many more programming bugs than raw pointers, both at compile and run time. iterator_to is thus meant to be used in scenarios where access via iterators is not suitable or desireable: > > - Interoperability with preexisting APIs based on pointers or references. > - Publication of pointer-based interfaces (for instance, when designing a C-compatible library). > - The exposure of pointers in place of iterators can act as a type erasure barrier effectively decoupling the user of the code from the implementation detail of which particular container is being used. Similar techniques, like the famous Pimpl idiom, are used in large projects to reduce dependencies and build times. > - Self-referencing contexts where an element acts upon its owner container and no iterator to itself is available. In other words, iterator_to is the perfect tool for the job by the last reason given. Under the hood it should just be a simple pointer cast and have no major runtime overhead (depending on if the function call is inlined). Edit by laanwj: removed at sign from the description ACKs for top commit: jonatack: re-ACK 296be8f per `git range-diff ab338a19 3ba1665 296be8f`, sanity check gcc 10.2 debug build is clean. hebasto: re-ACK 296be8f58e02b39a58f017c52294aceed22c3ffd, only rebased since my [previous](https://github.com/bitcoin/bitcoin/pull/19478#pullrequestreview-482400727) review (verified with `git range-diff`). Tree-SHA512: f5c30a4936fcde6ae32a02823c303b3568a747c2681d11f87df88a149f984a6d3b4c81f391859afbeb68864ef7f6a3d8779f74a58e3de701b3d51f78e498682e
2020-09-07 12:06:38 +02:00
const CTxMemPoolEntry::Children& children = it->GetMemPoolChildrenConst();
for (const CTxMemPoolEntry& updateIt : children) {
UpdateParent(mapTx.iterator_to(updateIt), it, false);
estimatefee / estimatepriority RPC methods New RPC methods: return an estimate of the fee (or priority) a transaction needs to be likely to confirm in a given number of blocks. Mike Hearn created the first version of this method for estimating fees. It works as follows: For transactions that took 1 to N (I picked N=25) blocks to confirm, keep N buckets with at most 100 entries in each recording the fees-per-kilobyte paid by those transactions. (separate buckets are kept for transactions that confirmed because they are high-priority) The buckets are filled as blocks are found, and are saved/restored in a new fee_estiamtes.dat file in the data directory. A few variations on Mike's initial scheme: To estimate the fee needed for a transaction to confirm in X buckets, all of the samples in all of the buckets are used and a median of all of the data is used to make the estimate. For example, imagine 25 buckets each containing the full 100 entries. Those 2,500 samples are sorted, and the estimate of the fee needed to confirm in the very next block is the 50'th-highest-fee-entry in that sorted list; the estimate of the fee needed to confirm in the next two blocks is the 150'th-highest-fee-entry, etc. That algorithm has the nice property that estimates of how much fee you need to pay to get confirmed in block N will always be greater than or equal to the estimate for block N+1. It would clearly be wrong to say "pay 11 uBTC and you'll get confirmed in 3 blocks, but pay 12 uBTC and it will take LONGER". A single block will not contribute more than 10 entries to any one bucket, so a single miner and a large block cannot overwhelm the estimates.
2014-03-17 13:19:54 +01:00
}
}
estimatefee / estimatepriority RPC methods New RPC methods: return an estimate of the fee (or priority) a transaction needs to be likely to confirm in a given number of blocks. Mike Hearn created the first version of this method for estimating fees. It works as follows: For transactions that took 1 to N (I picked N=25) blocks to confirm, keep N buckets with at most 100 entries in each recording the fees-per-kilobyte paid by those transactions. (separate buckets are kept for transactions that confirmed because they are high-priority) The buckets are filled as blocks are found, and are saved/restored in a new fee_estiamtes.dat file in the data directory. A few variations on Mike's initial scheme: To estimate the fee needed for a transaction to confirm in X buckets, all of the samples in all of the buckets are used and a median of all of the data is used to make the estimate. For example, imagine 25 buckets each containing the full 100 entries. Those 2,500 samples are sorted, and the estimate of the fee needed to confirm in the very next block is the 50'th-highest-fee-entry in that sorted list; the estimate of the fee needed to confirm in the next two blocks is the 150'th-highest-fee-entry, etc. That algorithm has the nice property that estimates of how much fee you need to pay to get confirmed in block N will always be greater than or equal to the estimate for block N+1. It would clearly be wrong to say "pay 11 uBTC and you'll get confirmed in 3 blocks, but pay 12 uBTC and it will take LONGER". A single block will not contribute more than 10 entries to any one bucket, so a single miner and a large block cannot overwhelm the estimates.
2014-03-17 13:19:54 +01:00
void CTxMemPool::UpdateForRemoveFromMempool(const setEntries &entriesToRemove, bool updateDescendants)
{
// For each entry, walk back all ancestors and decrement size associated with this
// transaction
const uint64_t nNoLimit = std::numeric_limits<uint64_t>::max();
if (updateDescendants) {
// updateDescendants should be true whenever we're not recursively
// removing a tx and all its descendants, eg when a transaction is
// confirmed in a block.
Merge #19478: Remove CTxMempool::mapLinks data structure member 296be8f58e02b39a58f017c52294aceed22c3ffd Get rid of unused functions CTxMemPool::GetMemPoolChildren, CTxMemPool::GetMemPoolParents (Jeremy Rubin) 46d955d196043cc297834baeebce31ff778dff80 Remove mapLinks in favor of entry inlined structs with iterator type erasure (Jeremy Rubin) Pull request description: Currently we have a peculiar data structure in the mempool called maplinks. Maplinks job is to track the in-pool children and parents of each transaction. This PR can be primarily understood and reviewed as a simple refactoring to remove this extra data structure, although it comes with a nice memory and performance improvement for free. Maplinks is particularly peculiar because removing it is not as simple as just moving it's inner structure to the owning CTxMempoolEntry. Because TxLinks (the class storing the setEntries for parents and children) store txiters to each entry in the mempool corresponding to the parent or child, it means that the TxLinks type is "aware" of the boost multiindex (mapTx) it's coming from, which is in turn, aware of the entry type stored in mapTx. Thus we used maplinks to store this entry associated data we in an entirely separate data structure just to avoid a circular type reference caused by storing a txiter inside a CTxMempoolEntry. It turns out, we can kill this circular reference by making use of iterator_to multiindex function and std::reference_wrapper. This allows us to get rid of the maplinks data structure and move the ownership of the parents/child sets to the entries themselves. The benefit of this good all around, for any of the reasons given below the change would be acceptable, and it doesn't make the code harder to reason about or worse in any respect (as far as I can tell, there's no tradeoff). ### Simpler ownership model No longer having to consistency check that mapLinks did have records for our CTxMempoolEntry, impossible to have a mapLinks entry outlive or incorrectly die before a CTxMempoolEntry. ### Memory Usage We get rid of a O(Transactions) sized map in the mempool, which is a long lived data structure. ### Performance If you have a CTxMemPoolEntry, you immediately know the address of it's children/parents, rather than having to do a O(log(Transactions)) lookup via maplinks (which we do very often). We do it in *so many* places that a true benchmark has to look at a full running node, but it is easy enough to show an improvement in this case. The ComplexMemPool shows a good coherence check that we see the expected result of it being 12.5% faster / 1.14x faster. ``` Before: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.40462, 0.277222, 0.285339, 0.279793 After: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.22586, 0.243831, 0.247076, 0.244596 ``` The ComplexMemPool benchmark only checks doing addUnchecked and TrimToSize for 800 transactions. While this bench does a good job of hammering the relevant types of function, it doesn't test everything. Subbing in 5000 transactions shows a that the advantage isn't completely wiped out by other asymptotic factors (this isn't the only bottleneck in growing the mempool), but it's only a bit proportionally slower (10.8%, 1.12x), which adds evidence that this will be a good change for performance minded users. ``` # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 59.1321, 11.5919, 12.235, 11.7068 # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 52.1307, 10.2641, 10.5206, 10.4306 ``` I don't think it's possible to come up with an example of where a maplinks based design would have better performance, but it's something for reviewers to consider. # Discussion ## Why maplinks in the first place? I spoke with the author of mapLinks (sdaftuar) a while back, and my recollection from our conversation was that it was implemented because he did not know how to resolve the circular dependency at the time, and there was no other reason for making it a separate map. ## Is iterator_to weird? iterator_to is expressly for this purpose, see https://www.boost.org/doc/libs/1_51_0/libs/multi_index/doc/tutorial/indices.html#iterator_to > iterator_to provides a way to retrieve an iterator to an element from a pointer to the element, thus making iterators and pointers interchangeable for the purposes of element pointing (not so for traversal) in many situations. This notwithstanding, it is not the aim of iterator_to to promote the usage of pointers as substitutes for real iterators: the latter are specifically designed for handling the elements of a container, and not only benefit from the iterator orientation of container interfaces, but are also capable of exposing many more programming bugs than raw pointers, both at compile and run time. iterator_to is thus meant to be used in scenarios where access via iterators is not suitable or desireable: > > - Interoperability with preexisting APIs based on pointers or references. > - Publication of pointer-based interfaces (for instance, when designing a C-compatible library). > - The exposure of pointers in place of iterators can act as a type erasure barrier effectively decoupling the user of the code from the implementation detail of which particular container is being used. Similar techniques, like the famous Pimpl idiom, are used in large projects to reduce dependencies and build times. > - Self-referencing contexts where an element acts upon its owner container and no iterator to itself is available. In other words, iterator_to is the perfect tool for the job by the last reason given. Under the hood it should just be a simple pointer cast and have no major runtime overhead (depending on if the function call is inlined). Edit by laanwj: removed at sign from the description ACKs for top commit: jonatack: re-ACK 296be8f per `git range-diff ab338a19 3ba1665 296be8f`, sanity check gcc 10.2 debug build is clean. hebasto: re-ACK 296be8f58e02b39a58f017c52294aceed22c3ffd, only rebased since my [previous](https://github.com/bitcoin/bitcoin/pull/19478#pullrequestreview-482400727) review (verified with `git range-diff`). Tree-SHA512: f5c30a4936fcde6ae32a02823c303b3568a747c2681d11f87df88a149f984a6d3b4c81f391859afbeb68864ef7f6a3d8779f74a58e3de701b3d51f78e498682e
2020-09-07 12:06:38 +02:00
// Here we only update statistics and not data in CTxMemPool::Parents
// and CTxMemPoolEntry::Children (which we need to preserve until we're
// finished with all operations that need to traverse the mempool).
for (txiter removeIt : entriesToRemove) {
setEntries setDescendants;
CalculateDescendants(removeIt, setDescendants);
setDescendants.erase(removeIt); // don't update state for self
int64_t modifySize = -((int64_t)removeIt->GetTxSize());
CAmount modifyFee = -removeIt->GetModifiedFee();
int modifySigOps = -removeIt->GetSigOpCount();
for (txiter dit : setDescendants) {
mapTx.modify(dit, update_ancestor_state(modifySize, modifyFee, -1, modifySigOps));
}
}
}
for (txiter removeIt : entriesToRemove) {
setEntries setAncestors;
const CTxMemPoolEntry &entry = *removeIt;
std::string dummy;
// Since this is a tx that is already in the mempool, we can call CMPA
// with fSearchForParents = false. If the mempool is in a consistent
// state, then using true or false should both be correct, though false
// should be a bit faster.
// However, if we happen to be in the middle of processing a reorg, then
// the mempool can be in an inconsistent state. In this case, the set
Merge #19478: Remove CTxMempool::mapLinks data structure member 296be8f58e02b39a58f017c52294aceed22c3ffd Get rid of unused functions CTxMemPool::GetMemPoolChildren, CTxMemPool::GetMemPoolParents (Jeremy Rubin) 46d955d196043cc297834baeebce31ff778dff80 Remove mapLinks in favor of entry inlined structs with iterator type erasure (Jeremy Rubin) Pull request description: Currently we have a peculiar data structure in the mempool called maplinks. Maplinks job is to track the in-pool children and parents of each transaction. This PR can be primarily understood and reviewed as a simple refactoring to remove this extra data structure, although it comes with a nice memory and performance improvement for free. Maplinks is particularly peculiar because removing it is not as simple as just moving it's inner structure to the owning CTxMempoolEntry. Because TxLinks (the class storing the setEntries for parents and children) store txiters to each entry in the mempool corresponding to the parent or child, it means that the TxLinks type is "aware" of the boost multiindex (mapTx) it's coming from, which is in turn, aware of the entry type stored in mapTx. Thus we used maplinks to store this entry associated data we in an entirely separate data structure just to avoid a circular type reference caused by storing a txiter inside a CTxMempoolEntry. It turns out, we can kill this circular reference by making use of iterator_to multiindex function and std::reference_wrapper. This allows us to get rid of the maplinks data structure and move the ownership of the parents/child sets to the entries themselves. The benefit of this good all around, for any of the reasons given below the change would be acceptable, and it doesn't make the code harder to reason about or worse in any respect (as far as I can tell, there's no tradeoff). ### Simpler ownership model No longer having to consistency check that mapLinks did have records for our CTxMempoolEntry, impossible to have a mapLinks entry outlive or incorrectly die before a CTxMempoolEntry. ### Memory Usage We get rid of a O(Transactions) sized map in the mempool, which is a long lived data structure. ### Performance If you have a CTxMemPoolEntry, you immediately know the address of it's children/parents, rather than having to do a O(log(Transactions)) lookup via maplinks (which we do very often). We do it in *so many* places that a true benchmark has to look at a full running node, but it is easy enough to show an improvement in this case. The ComplexMemPool shows a good coherence check that we see the expected result of it being 12.5% faster / 1.14x faster. ``` Before: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.40462, 0.277222, 0.285339, 0.279793 After: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.22586, 0.243831, 0.247076, 0.244596 ``` The ComplexMemPool benchmark only checks doing addUnchecked and TrimToSize for 800 transactions. While this bench does a good job of hammering the relevant types of function, it doesn't test everything. Subbing in 5000 transactions shows a that the advantage isn't completely wiped out by other asymptotic factors (this isn't the only bottleneck in growing the mempool), but it's only a bit proportionally slower (10.8%, 1.12x), which adds evidence that this will be a good change for performance minded users. ``` # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 59.1321, 11.5919, 12.235, 11.7068 # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 52.1307, 10.2641, 10.5206, 10.4306 ``` I don't think it's possible to come up with an example of where a maplinks based design would have better performance, but it's something for reviewers to consider. # Discussion ## Why maplinks in the first place? I spoke with the author of mapLinks (sdaftuar) a while back, and my recollection from our conversation was that it was implemented because he did not know how to resolve the circular dependency at the time, and there was no other reason for making it a separate map. ## Is iterator_to weird? iterator_to is expressly for this purpose, see https://www.boost.org/doc/libs/1_51_0/libs/multi_index/doc/tutorial/indices.html#iterator_to > iterator_to provides a way to retrieve an iterator to an element from a pointer to the element, thus making iterators and pointers interchangeable for the purposes of element pointing (not so for traversal) in many situations. This notwithstanding, it is not the aim of iterator_to to promote the usage of pointers as substitutes for real iterators: the latter are specifically designed for handling the elements of a container, and not only benefit from the iterator orientation of container interfaces, but are also capable of exposing many more programming bugs than raw pointers, both at compile and run time. iterator_to is thus meant to be used in scenarios where access via iterators is not suitable or desireable: > > - Interoperability with preexisting APIs based on pointers or references. > - Publication of pointer-based interfaces (for instance, when designing a C-compatible library). > - The exposure of pointers in place of iterators can act as a type erasure barrier effectively decoupling the user of the code from the implementation detail of which particular container is being used. Similar techniques, like the famous Pimpl idiom, are used in large projects to reduce dependencies and build times. > - Self-referencing contexts where an element acts upon its owner container and no iterator to itself is available. In other words, iterator_to is the perfect tool for the job by the last reason given. Under the hood it should just be a simple pointer cast and have no major runtime overhead (depending on if the function call is inlined). Edit by laanwj: removed at sign from the description ACKs for top commit: jonatack: re-ACK 296be8f per `git range-diff ab338a19 3ba1665 296be8f`, sanity check gcc 10.2 debug build is clean. hebasto: re-ACK 296be8f58e02b39a58f017c52294aceed22c3ffd, only rebased since my [previous](https://github.com/bitcoin/bitcoin/pull/19478#pullrequestreview-482400727) review (verified with `git range-diff`). Tree-SHA512: f5c30a4936fcde6ae32a02823c303b3568a747c2681d11f87df88a149f984a6d3b4c81f391859afbeb68864ef7f6a3d8779f74a58e3de701b3d51f78e498682e
2020-09-07 12:06:38 +02:00
// of ancestors reachable via GetMemPoolParents()/GetMemPoolChildren()
// will be the same as the set of ancestors whose packages include this
// transaction, because when we add a new transaction to the mempool in
// addUnchecked(), we assume it has no children, and in the case of a
// reorg where that assumption is false, the in-mempool children aren't
// linked to the in-block tx's until UpdateTransactionsFromBlock() is
// called.
// So if we're being called during a reorg, ie before
Merge #19478: Remove CTxMempool::mapLinks data structure member 296be8f58e02b39a58f017c52294aceed22c3ffd Get rid of unused functions CTxMemPool::GetMemPoolChildren, CTxMemPool::GetMemPoolParents (Jeremy Rubin) 46d955d196043cc297834baeebce31ff778dff80 Remove mapLinks in favor of entry inlined structs with iterator type erasure (Jeremy Rubin) Pull request description: Currently we have a peculiar data structure in the mempool called maplinks. Maplinks job is to track the in-pool children and parents of each transaction. This PR can be primarily understood and reviewed as a simple refactoring to remove this extra data structure, although it comes with a nice memory and performance improvement for free. Maplinks is particularly peculiar because removing it is not as simple as just moving it's inner structure to the owning CTxMempoolEntry. Because TxLinks (the class storing the setEntries for parents and children) store txiters to each entry in the mempool corresponding to the parent or child, it means that the TxLinks type is "aware" of the boost multiindex (mapTx) it's coming from, which is in turn, aware of the entry type stored in mapTx. Thus we used maplinks to store this entry associated data we in an entirely separate data structure just to avoid a circular type reference caused by storing a txiter inside a CTxMempoolEntry. It turns out, we can kill this circular reference by making use of iterator_to multiindex function and std::reference_wrapper. This allows us to get rid of the maplinks data structure and move the ownership of the parents/child sets to the entries themselves. The benefit of this good all around, for any of the reasons given below the change would be acceptable, and it doesn't make the code harder to reason about or worse in any respect (as far as I can tell, there's no tradeoff). ### Simpler ownership model No longer having to consistency check that mapLinks did have records for our CTxMempoolEntry, impossible to have a mapLinks entry outlive or incorrectly die before a CTxMempoolEntry. ### Memory Usage We get rid of a O(Transactions) sized map in the mempool, which is a long lived data structure. ### Performance If you have a CTxMemPoolEntry, you immediately know the address of it's children/parents, rather than having to do a O(log(Transactions)) lookup via maplinks (which we do very often). We do it in *so many* places that a true benchmark has to look at a full running node, but it is easy enough to show an improvement in this case. The ComplexMemPool shows a good coherence check that we see the expected result of it being 12.5% faster / 1.14x faster. ``` Before: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.40462, 0.277222, 0.285339, 0.279793 After: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.22586, 0.243831, 0.247076, 0.244596 ``` The ComplexMemPool benchmark only checks doing addUnchecked and TrimToSize for 800 transactions. While this bench does a good job of hammering the relevant types of function, it doesn't test everything. Subbing in 5000 transactions shows a that the advantage isn't completely wiped out by other asymptotic factors (this isn't the only bottleneck in growing the mempool), but it's only a bit proportionally slower (10.8%, 1.12x), which adds evidence that this will be a good change for performance minded users. ``` # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 59.1321, 11.5919, 12.235, 11.7068 # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 52.1307, 10.2641, 10.5206, 10.4306 ``` I don't think it's possible to come up with an example of where a maplinks based design would have better performance, but it's something for reviewers to consider. # Discussion ## Why maplinks in the first place? I spoke with the author of mapLinks (sdaftuar) a while back, and my recollection from our conversation was that it was implemented because he did not know how to resolve the circular dependency at the time, and there was no other reason for making it a separate map. ## Is iterator_to weird? iterator_to is expressly for this purpose, see https://www.boost.org/doc/libs/1_51_0/libs/multi_index/doc/tutorial/indices.html#iterator_to > iterator_to provides a way to retrieve an iterator to an element from a pointer to the element, thus making iterators and pointers interchangeable for the purposes of element pointing (not so for traversal) in many situations. This notwithstanding, it is not the aim of iterator_to to promote the usage of pointers as substitutes for real iterators: the latter are specifically designed for handling the elements of a container, and not only benefit from the iterator orientation of container interfaces, but are also capable of exposing many more programming bugs than raw pointers, both at compile and run time. iterator_to is thus meant to be used in scenarios where access via iterators is not suitable or desireable: > > - Interoperability with preexisting APIs based on pointers or references. > - Publication of pointer-based interfaces (for instance, when designing a C-compatible library). > - The exposure of pointers in place of iterators can act as a type erasure barrier effectively decoupling the user of the code from the implementation detail of which particular container is being used. Similar techniques, like the famous Pimpl idiom, are used in large projects to reduce dependencies and build times. > - Self-referencing contexts where an element acts upon its owner container and no iterator to itself is available. In other words, iterator_to is the perfect tool for the job by the last reason given. Under the hood it should just be a simple pointer cast and have no major runtime overhead (depending on if the function call is inlined). Edit by laanwj: removed at sign from the description ACKs for top commit: jonatack: re-ACK 296be8f per `git range-diff ab338a19 3ba1665 296be8f`, sanity check gcc 10.2 debug build is clean. hebasto: re-ACK 296be8f58e02b39a58f017c52294aceed22c3ffd, only rebased since my [previous](https://github.com/bitcoin/bitcoin/pull/19478#pullrequestreview-482400727) review (verified with `git range-diff`). Tree-SHA512: f5c30a4936fcde6ae32a02823c303b3568a747c2681d11f87df88a149f984a6d3b4c81f391859afbeb68864ef7f6a3d8779f74a58e3de701b3d51f78e498682e
2020-09-07 12:06:38 +02:00
// UpdateTransactionsFromBlock() has been called, then
// GetMemPoolParents()/GetMemPoolChildren() will differ from the set of
// mempool parents we'd calculate by searching, and it's important that
// we use the cached notion of ancestor transactions as the set of
// things to update for removal.
CalculateMemPoolAncestors(entry, setAncestors, nNoLimit, nNoLimit, nNoLimit, nNoLimit, dummy, false);
// Note that UpdateAncestorsOf severs the child links that point to
// removeIt in the entries for the parents of removeIt.
UpdateAncestorsOf(false, removeIt, setAncestors);
}
// After updating all the ancestor sizes, we can now sever the link between each
Merge #19478: Remove CTxMempool::mapLinks data structure member 296be8f58e02b39a58f017c52294aceed22c3ffd Get rid of unused functions CTxMemPool::GetMemPoolChildren, CTxMemPool::GetMemPoolParents (Jeremy Rubin) 46d955d196043cc297834baeebce31ff778dff80 Remove mapLinks in favor of entry inlined structs with iterator type erasure (Jeremy Rubin) Pull request description: Currently we have a peculiar data structure in the mempool called maplinks. Maplinks job is to track the in-pool children and parents of each transaction. This PR can be primarily understood and reviewed as a simple refactoring to remove this extra data structure, although it comes with a nice memory and performance improvement for free. Maplinks is particularly peculiar because removing it is not as simple as just moving it's inner structure to the owning CTxMempoolEntry. Because TxLinks (the class storing the setEntries for parents and children) store txiters to each entry in the mempool corresponding to the parent or child, it means that the TxLinks type is "aware" of the boost multiindex (mapTx) it's coming from, which is in turn, aware of the entry type stored in mapTx. Thus we used maplinks to store this entry associated data we in an entirely separate data structure just to avoid a circular type reference caused by storing a txiter inside a CTxMempoolEntry. It turns out, we can kill this circular reference by making use of iterator_to multiindex function and std::reference_wrapper. This allows us to get rid of the maplinks data structure and move the ownership of the parents/child sets to the entries themselves. The benefit of this good all around, for any of the reasons given below the change would be acceptable, and it doesn't make the code harder to reason about or worse in any respect (as far as I can tell, there's no tradeoff). ### Simpler ownership model No longer having to consistency check that mapLinks did have records for our CTxMempoolEntry, impossible to have a mapLinks entry outlive or incorrectly die before a CTxMempoolEntry. ### Memory Usage We get rid of a O(Transactions) sized map in the mempool, which is a long lived data structure. ### Performance If you have a CTxMemPoolEntry, you immediately know the address of it's children/parents, rather than having to do a O(log(Transactions)) lookup via maplinks (which we do very often). We do it in *so many* places that a true benchmark has to look at a full running node, but it is easy enough to show an improvement in this case. The ComplexMemPool shows a good coherence check that we see the expected result of it being 12.5% faster / 1.14x faster. ``` Before: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.40462, 0.277222, 0.285339, 0.279793 After: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.22586, 0.243831, 0.247076, 0.244596 ``` The ComplexMemPool benchmark only checks doing addUnchecked and TrimToSize for 800 transactions. While this bench does a good job of hammering the relevant types of function, it doesn't test everything. Subbing in 5000 transactions shows a that the advantage isn't completely wiped out by other asymptotic factors (this isn't the only bottleneck in growing the mempool), but it's only a bit proportionally slower (10.8%, 1.12x), which adds evidence that this will be a good change for performance minded users. ``` # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 59.1321, 11.5919, 12.235, 11.7068 # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 52.1307, 10.2641, 10.5206, 10.4306 ``` I don't think it's possible to come up with an example of where a maplinks based design would have better performance, but it's something for reviewers to consider. # Discussion ## Why maplinks in the first place? I spoke with the author of mapLinks (sdaftuar) a while back, and my recollection from our conversation was that it was implemented because he did not know how to resolve the circular dependency at the time, and there was no other reason for making it a separate map. ## Is iterator_to weird? iterator_to is expressly for this purpose, see https://www.boost.org/doc/libs/1_51_0/libs/multi_index/doc/tutorial/indices.html#iterator_to > iterator_to provides a way to retrieve an iterator to an element from a pointer to the element, thus making iterators and pointers interchangeable for the purposes of element pointing (not so for traversal) in many situations. This notwithstanding, it is not the aim of iterator_to to promote the usage of pointers as substitutes for real iterators: the latter are specifically designed for handling the elements of a container, and not only benefit from the iterator orientation of container interfaces, but are also capable of exposing many more programming bugs than raw pointers, both at compile and run time. iterator_to is thus meant to be used in scenarios where access via iterators is not suitable or desireable: > > - Interoperability with preexisting APIs based on pointers or references. > - Publication of pointer-based interfaces (for instance, when designing a C-compatible library). > - The exposure of pointers in place of iterators can act as a type erasure barrier effectively decoupling the user of the code from the implementation detail of which particular container is being used. Similar techniques, like the famous Pimpl idiom, are used in large projects to reduce dependencies and build times. > - Self-referencing contexts where an element acts upon its owner container and no iterator to itself is available. In other words, iterator_to is the perfect tool for the job by the last reason given. Under the hood it should just be a simple pointer cast and have no major runtime overhead (depending on if the function call is inlined). Edit by laanwj: removed at sign from the description ACKs for top commit: jonatack: re-ACK 296be8f per `git range-diff ab338a19 3ba1665 296be8f`, sanity check gcc 10.2 debug build is clean. hebasto: re-ACK 296be8f58e02b39a58f017c52294aceed22c3ffd, only rebased since my [previous](https://github.com/bitcoin/bitcoin/pull/19478#pullrequestreview-482400727) review (verified with `git range-diff`). Tree-SHA512: f5c30a4936fcde6ae32a02823c303b3568a747c2681d11f87df88a149f984a6d3b4c81f391859afbeb68864ef7f6a3d8779f74a58e3de701b3d51f78e498682e
2020-09-07 12:06:38 +02:00
// transaction being removed and any mempool children (ie, update CTxMemPoolEntry::m_parents
// for each direct child of a transaction being removed).
for (txiter removeIt : entriesToRemove) {
UpdateChildrenForRemoval(removeIt);
estimatefee / estimatepriority RPC methods New RPC methods: return an estimate of the fee (or priority) a transaction needs to be likely to confirm in a given number of blocks. Mike Hearn created the first version of this method for estimating fees. It works as follows: For transactions that took 1 to N (I picked N=25) blocks to confirm, keep N buckets with at most 100 entries in each recording the fees-per-kilobyte paid by those transactions. (separate buckets are kept for transactions that confirmed because they are high-priority) The buckets are filled as blocks are found, and are saved/restored in a new fee_estiamtes.dat file in the data directory. A few variations on Mike's initial scheme: To estimate the fee needed for a transaction to confirm in X buckets, all of the samples in all of the buckets are used and a median of all of the data is used to make the estimate. For example, imagine 25 buckets each containing the full 100 entries. Those 2,500 samples are sorted, and the estimate of the fee needed to confirm in the very next block is the 50'th-highest-fee-entry in that sorted list; the estimate of the fee needed to confirm in the next two blocks is the 150'th-highest-fee-entry, etc. That algorithm has the nice property that estimates of how much fee you need to pay to get confirmed in block N will always be greater than or equal to the estimate for block N+1. It would clearly be wrong to say "pay 11 uBTC and you'll get confirmed in 3 blocks, but pay 12 uBTC and it will take LONGER". A single block will not contribute more than 10 entries to any one bucket, so a single miner and a large block cannot overwhelm the estimates.
2014-03-17 13:19:54 +01:00
}
}
estimatefee / estimatepriority RPC methods New RPC methods: return an estimate of the fee (or priority) a transaction needs to be likely to confirm in a given number of blocks. Mike Hearn created the first version of this method for estimating fees. It works as follows: For transactions that took 1 to N (I picked N=25) blocks to confirm, keep N buckets with at most 100 entries in each recording the fees-per-kilobyte paid by those transactions. (separate buckets are kept for transactions that confirmed because they are high-priority) The buckets are filled as blocks are found, and are saved/restored in a new fee_estiamtes.dat file in the data directory. A few variations on Mike's initial scheme: To estimate the fee needed for a transaction to confirm in X buckets, all of the samples in all of the buckets are used and a median of all of the data is used to make the estimate. For example, imagine 25 buckets each containing the full 100 entries. Those 2,500 samples are sorted, and the estimate of the fee needed to confirm in the very next block is the 50'th-highest-fee-entry in that sorted list; the estimate of the fee needed to confirm in the next two blocks is the 150'th-highest-fee-entry, etc. That algorithm has the nice property that estimates of how much fee you need to pay to get confirmed in block N will always be greater than or equal to the estimate for block N+1. It would clearly be wrong to say "pay 11 uBTC and you'll get confirmed in 3 blocks, but pay 12 uBTC and it will take LONGER". A single block will not contribute more than 10 entries to any one bucket, so a single miner and a large block cannot overwhelm the estimates.
2014-03-17 13:19:54 +01:00
void CTxMemPoolEntry::UpdateDescendantState(int64_t modifySize, CAmount modifyFee, int64_t modifyCount)
{
nSizeWithDescendants += modifySize;
assert(int64_t(nSizeWithDescendants) > 0);
nModFeesWithDescendants += modifyFee;
nCountWithDescendants += modifyCount;
assert(int64_t(nCountWithDescendants) > 0);
}
void CTxMemPoolEntry::UpdateAncestorState(int64_t modifySize, CAmount modifyFee, int64_t modifyCount, int64_t modifySigOps)
{
nSizeWithAncestors += modifySize;
assert(int64_t(nSizeWithAncestors) > 0);
nModFeesWithAncestors += modifyFee;
nCountWithAncestors += modifyCount;
assert(int64_t(nCountWithAncestors) > 0);
nSigOpCountWithAncestors += modifySigOps;
assert(int(nSigOpCountWithAncestors) >= 0);
}
estimatefee / estimatepriority RPC methods New RPC methods: return an estimate of the fee (or priority) a transaction needs to be likely to confirm in a given number of blocks. Mike Hearn created the first version of this method for estimating fees. It works as follows: For transactions that took 1 to N (I picked N=25) blocks to confirm, keep N buckets with at most 100 entries in each recording the fees-per-kilobyte paid by those transactions. (separate buckets are kept for transactions that confirmed because they are high-priority) The buckets are filled as blocks are found, and are saved/restored in a new fee_estiamtes.dat file in the data directory. A few variations on Mike's initial scheme: To estimate the fee needed for a transaction to confirm in X buckets, all of the samples in all of the buckets are used and a median of all of the data is used to make the estimate. For example, imagine 25 buckets each containing the full 100 entries. Those 2,500 samples are sorted, and the estimate of the fee needed to confirm in the very next block is the 50'th-highest-fee-entry in that sorted list; the estimate of the fee needed to confirm in the next two blocks is the 150'th-highest-fee-entry, etc. That algorithm has the nice property that estimates of how much fee you need to pay to get confirmed in block N will always be greater than or equal to the estimate for block N+1. It would clearly be wrong to say "pay 11 uBTC and you'll get confirmed in 3 blocks, but pay 12 uBTC and it will take LONGER". A single block will not contribute more than 10 entries to any one bucket, so a single miner and a large block cannot overwhelm the estimates.
2014-03-17 13:19:54 +01:00
CTxMemPool::CTxMemPool(CBlockPolicyEstimator* estimator, int check_ratio)
: m_check_ratio(check_ratio), minerPolicyEstimator(estimator)
{
_clear(); //lock free clear
}
void CTxMemPool::ConnectManagers(gsl::not_null<CDeterministicMNManager*> dmnman)
{
// Do not allow double-initialization
assert(m_dmnman == nullptr);
m_dmnman = dmnman;
}
bool CTxMemPool::isSpent(const COutPoint& outpoint) const
{
LOCK(cs);
Merge #10195: Switch chainstate db and cache to per-txout model 589827975 scripted-diff: various renames for per-utxo consistency (Pieter Wuille) a5e02bc7f Increase travis unit test timeout (Pieter Wuille) 73de2c1ff Rename CCoinsCacheEntry::coins to coin (Pieter Wuille) 119e552f7 Merge CCoinsViewCache's GetOutputFor and AccessCoin (Pieter Wuille) 580b02309 [MOVEONLY] Move old CCoins class to txdb.cpp (Pieter Wuille) 8b25d2c0c Upgrade from per-tx database to per-txout (Pieter Wuille) b2af357f3 Reduce reserved memory space for flushing (Pieter Wuille) 41aa5b79a Pack Coin more tightly (Pieter Wuille) 97072d668 Remove unused CCoins methods (Pieter Wuille) ce23efaa5 Extend coins_tests (Pieter Wuille) 508307968 Switch CCoinsView and chainstate db from per-txid to per-txout (Pieter Wuille) 4ec0d9e79 Refactor GetUTXOStats in preparation for per-COutPoint iteration (Pieter Wuille) 13870b56f Replace CCoins-based CTxMemPool::pruneSpent with isSpent (Pieter Wuille) 05293f3cb Remove ModifyCoins/ModifyNewCoins (Pieter Wuille) 961e48397 Switch tests from ModifyCoins to AddCoin/SpendCoin (Pieter Wuille) 8b3868c1b Switch CScriptCheck to use Coin instead of CCoins (Pieter Wuille) c87b957a3 Only pass things committed to by tx's witness hash to CScriptCheck (Matt Corallo) f68cdfe92 Switch from per-tx to per-txout CCoinsViewCache methods in some places (Pieter Wuille) 000391132 Introduce new per-txout CCoinsViewCache functions (Pieter Wuille) bd83111a0 Optimization: Coin&& to ApplyTxInUndo (Pieter Wuille) cb2c7fdac Replace CTxInUndo with Coin (Pieter Wuille) 422634e2f Introduce Coin, a single unspent output (Pieter Wuille) 7d991b55d Store/allow tx metadata in all undo records (Pieter Wuille) c3aa0c119 Report on-disk size in gettxoutsetinfo (Pieter Wuille) d34242430 Remove/ignore tx version in utxo and undo (Pieter Wuille) 7e0032290 Add specialization of SipHash for 256 + 32 bit data (Pieter Wuille) e484652fc Introduce CHashVerifier to hash read data (Pieter Wuille) f54580e7e error() in disconnect for disk corruption, not inconsistency (Pieter Wuille) e66dbde6d Add SizeEstimate to CDBBatch (Pieter Wuille) Tree-SHA512: ce1fb1e40c77d38915cd02189fab7a8b125c7f44d425c85579d872c3bede3a437760997907c99d7b3017ced1c2de54b2ac7223d99d83a6658fe5ef61edef1de3
2017-06-02 00:47:58 +02:00
return mapNextTx.count(outpoint);
}
unsigned int CTxMemPool::GetTransactionsUpdated() const
{
return nTransactionsUpdated;
}
void CTxMemPool::AddTransactionsUpdated(unsigned int n)
{
nTransactionsUpdated += n;
}
void CTxMemPool::addUnchecked(const CTxMemPoolEntry &entry, setEntries &setAncestors, bool validFeeEstimate)
{
// Add to memory pool without checking anything.
// Used by AcceptToMemoryPool(), which DOES do
// all the appropriate checks.
indexed_transaction_set::iterator newit = mapTx.insert(entry).first;
// Update transaction for any feeDelta created by PrioritiseTransaction
CAmount delta{0};
ApplyDelta(entry.GetTx().GetHash(), delta);
if (delta) {
mapTx.modify(newit, update_fee_delta(delta));
}
// Update cachedInnerUsage to include contained transaction's usage.
// (When we update the entry for in-mempool parents, memory usage will be
// further updated.)
cachedInnerUsage += entry.DynamicMemoryUsage();
const CTransaction& tx = newit->GetTx();
std::set<uint256> setParentTransactions;
for (unsigned int i = 0; i < tx.vin.size(); i++) {
mapNextTx.insert(std::make_pair(&tx.vin[i].prevout, &tx));
setParentTransactions.insert(tx.vin[i].prevout.hash);
}
// Don't bother worrying about child transactions of this one.
// Normal case of a new transaction arriving is that there can't be any
// children, because such children would be orphans.
// An exception to that is if a transaction enters that used to be in a block.
// In that case, our disconnect block logic will call UpdateTransactionsFromBlock
// to clean up the mess we're leaving here.
// Update ancestors with information about this tx
for (const auto& pit : GetIterSet(setParentTransactions)) {
UpdateParent(newit, pit, true);
}
UpdateAncestorsOf(true, newit, setAncestors);
UpdateEntryForAncestors(newit, setAncestors);
nTransactionsUpdated++;
totalTxSize += entry.GetTxSize();
m_total_fee += entry.GetFee();
if (minerPolicyEstimator) {
minerPolicyEstimator->processTransaction(entry, validFeeEstimate);
}
vTxHashes.emplace_back(entry.GetTx().GetHash(), newit);
Backport compact blocks functionality from bitcoin (#1966) * Merge #8068: Compact Blocks 48efec8 Fix some minor compact block issues that came up in review (Matt Corallo) ccd06b9 Elaborate bucket size math (Pieter Wuille) 0d4cb48 Use vTxHashes to optimize InitData significantly (Matt Corallo) 8119026 Provide a flat list of txid/terators to txn in CTxMemPool (Matt Corallo) 678ee97 Add BIP 152 to implemented BIPs list (Matt Corallo) 56ba516 Add reconstruction debug logging (Matt Corallo) 2f34a2e Get our "best three" peers to announce blocks using cmpctblocks (Matt Corallo) 927f8ee Add ability to fetch CNode by NodeId (Matt Corallo) d25cd3e Add receiver-side protocol implementation for CMPCTBLOCK stuff (Matt Corallo) 9c837d5 Add sender-side protocol implementation for CMPCTBLOCK stuff (Matt Corallo) 00c4078 Add protocol messages for short-ids blocks (Matt Corallo) e3b2222 Add some blockencodings tests (Matt Corallo) f4f8f14 Add TestMemPoolEntryHelper::FromTx version for CTransaction (Matt Corallo) 85ad31e Add partial-block block encodings API (Matt Corallo) 5249dac Add COMPACTSIZE wrapper similar to VARINT for serialization (Matt Corallo) cbda71c Move context-required checks from CheckBlockHeader to Contextual... (Matt Corallo) 7c29ec9 If AcceptBlockHeader returns true, pindex will be set. (Matt Corallo) 96806c3 Stop trimming when mapTx is empty (Pieter Wuille) * Merge #8408: Prevent fingerprinting, disk-DoS with compact blocks 1d06e49 Ignore CMPCTBLOCK messages for pruned blocks (Suhas Daftuar) 1de2a46 Ignore GETBLOCKTXN requests for unknown blocks (Suhas Daftuar) * Merge #8418: Add tests for compact blocks 45c7ddd Add p2p test for BIP 152 (compact blocks) (Suhas Daftuar) 9a22a6c Add support for compactblocks to mininode (Suhas Daftuar) a8689fd Tests: refactor compact size serialization in mininode (Suhas Daftuar) 9c8593d Implement SipHash in Python (Pieter Wuille) 56c87e9 Allow changing BIP9 parameters on regtest (Suhas Daftuar) * Merge #8505: Trivial: Fix typos in various files 1aacfc2 various typos (leijurv) * Merge #8449: [Trivial] Do not shadow local variable, cleanup a159f25 Remove redundand (and shadowing) declaration (Pavel Janík) cce3024 Do not shadow local variable, cleanup (Pavel Janík) * Merge #8739: [qa] Fix broken sendcmpct test in p2p-compactblocks.py 157254a Fix broken sendcmpct test in p2p-compactblocks.py (Suhas Daftuar) * Merge #8854: [qa] Fix race condition in p2p-compactblocks test b5fd666 [qa] Fix race condition in p2p-compactblocks test (Suhas Daftuar) * Merge #8393: Support for compact blocks together with segwit 27acfc1 [qa] Update p2p-compactblocks.py for compactblocks v2 (Suhas Daftuar) 422fac6 [qa] Add support for compactblocks v2 to mininode (Suhas Daftuar) f5b9b8f [qa] Fix bug in mininode witness deserialization (Suhas Daftuar) 6aa28ab Use cmpctblock type 2 for segwit-enabled transfer (Pieter Wuille) be7555f Fix overly-prescriptive p2p-segwit test for new fetch logic (Matt Corallo) 06128da Make GetFetchFlags always request witness objects from witness peers (Matt Corallo) * Merge #8882: [qa] Fix race conditions in p2p-compactblocks.py and sendheaders.py b55d941 [qa] Fix race condition in sendheaders.py (Suhas Daftuar) 6976db2 [qa] Another attempt to fix race condition in p2p-compactblocks.py (Suhas Daftuar) * Merge #8904: [qa] Fix compact block shortids for a test case 4cdece4 [qa] Fix compact block shortids for a test case (Dagur Valberg Johannsson) * Merge #8637: Compact Block Tweaks (rebase of #8235) 3ac6de0 Align constant names for maximum compact block / blocktxn depth (Pieter Wuille) b2e93a3 Add cmpctblock to debug help list (instagibbs) fe998e9 More agressively filter compact block requests (Matt Corallo) 02a337d Dont remove a "preferred" cmpctblock peer if they provide a block (Matt Corallo) * Merge #8975: Chainparams: Trivial: In AppInit2(), s/Params()/chainparams/ 6f2f639 Chainparams: Trivial: In AppInit2(), s/Params()/chainparams/ (Jorge Timón) * Merge #8968: Don't hold cs_main when calling ProcessNewBlock from a cmpctblock 72ca7d9 Don't hold cs_main when calling ProcessNewBlock from a cmpctblock (Matt Corallo) * Merge #8995: Add missing cs_main lock to ::GETBLOCKTXN processing dfe7906 Add missing cs_main lock to ::GETBLOCKTXN processing (Matt Corallo) * Merge #8515: A few mempool removal optimizations 0334430 Add some missing includes (Pieter Wuille) 4100499 Return shared_ptr<CTransaction> from mempool removes (Pieter Wuille) 51f2783 Make removed and conflicted arguments optional to remove (Pieter Wuille) f48211b Bypass removeRecursive in removeForReorg (Pieter Wuille) * Merge #9026: Fix handling of invalid compact blocks d4833ff Bump the protocol version to distinguish new banning behavior. (Suhas Daftuar) 88c3549 Fix compact block handling to not ban if block is invalid (Suhas Daftuar) c93beac [qa] Test that invalid compactblocks don't result in ban (Suhas Daftuar) * Merge #9039: Various serialization simplifcations and optimizations d59a518 Use fixed preallocation instead of costly GetSerializeSize (Pieter Wuille) 25a211a Add optimized CSizeComputer serializers (Pieter Wuille) a2929a2 Make CSerAction's ForRead() constexpr (Pieter Wuille) a603925 Avoid -Wshadow errors (Pieter Wuille) 5284721 Get rid of nType and nVersion (Pieter Wuille) 657e05a Make GetSerializeSize a wrapper on top of CSizeComputer (Pieter Wuille) fad9b66 Make nType and nVersion private and sometimes const (Pieter Wuille) c2c5d42 Make streams' read and write return void (Pieter Wuille) 50e8a9c Remove unused ReadVersion and WriteVersion (Pieter Wuille) * Merge #9058: Fixes for p2p-compactblocks.py test timeouts on travis (#8842) dac53b5 Modify getblocktxn handler not to drop requests for old blocks (Russell Yanofsky) 55bfddc [qa] Fix stale data bug in test_compactblocks_not_at_tip (Russell Yanofsky) 47e9659 [qa] Fix bug in compactblocks v2 merge (Russell Yanofsky) * Merge #9160: [trivial] Fix hungarian variable name ec34648 [trivial] Fix hungarian variable name (Russell Yanofsky) * Merge #9159: [qa] Wait for specific block announcement in p2p-compactblocks dfa44d1 [qa] Wait for specific block announcement in p2p-compactblocks (Russell Yanofsky) * Merge #9125: Make CBlock a vector of shared_ptr of CTransactions b4e4ba4 Introduce convenience type CTransactionRef (Pieter Wuille) 1662b43 Make CBlock::vtx a vector of shared_ptr<CTransaction> (Pieter Wuille) da60506 Add deserializing constructors to CTransaction and CMutableTransaction (Pieter Wuille) 0e85204 Add serialization for unique_ptr and shared_ptr (Pieter Wuille) * Merge #8872: Remove block-request logic from INV message processing 037159c Remove block-request logic from INV message processing (Matt Corallo) 3451203 [qa] Respond to getheaders and do not assume a getdata on inv (Matt Corallo) d768f15 [qa] Make comptool push blocks instead of relying on inv-fetch (mrbandrews) * Merge #9199: Always drop the least preferred HB peer when adding a new one. ca8549d Always drop the least preferred HB peer when adding a new one. (Gregory Maxwell) * Merge #9233: Fix some typos 15fa95d Fix some typos (fsb4000) * Merge #9260: Mrs Peacock in The Library with The Candlestick (killed main.{h,cpp}) 76faa3c Rename the remaining main.{h,cpp} to validation.{h,cpp} (Matt Corallo) e736772 Move network-msg-processing code out of main to its own file (Matt Corallo) 87c35f5 Remove orphan state wipe from UnloadBlockIndex. (Matt Corallo) * Merge #9014: Fix block-connection performance regression dd0df81 Document ConnectBlock connectTrace postconditions (Matt Corallo) 2d6e561 Switch pblock in ProcessNewBlock to a shared_ptr (Matt Corallo) 2736c44 Make the optional pblock in ActivateBestChain a shared_ptr (Matt Corallo) ae4db44 Create a shared_ptr for the block we're connecting in ActivateBCS (Matt Corallo) fd9d890 Keep blocks as shared_ptrs, instead of copying txn in ConnectTip (Matt Corallo) 6fdd43b Add struct to track block-connect-time-generated info for callbacks (Matt Corallo) * Merge #9240: Remove txConflicted a874ab5 remove internal tracking of mempool conflicts for reporting to wallet (Alex Morcos) bf663f8 remove external usage of mempool conflict tracking (Alex Morcos) * Merge #9344: Do not run functions with necessary side-effects in assert() da9cdd2 Do not run functions with necessary side-effects in assert() (Gregory Maxwell) * Merge #9273: Remove unused CDiskBlockPos* argument from ProcessNewBlock a13fa4c Remove unused CDiskBlockPos* argument from ProcessNewBlock (Matt Corallo) * Merge #9352: Attempt reconstruction from all compact block announcements 813ede9 [qa] Update compactblocks test for multi-peer reconstruction (Suhas Daftuar) 7017298 Allow compactblock reconstruction when block is in flight (Suhas Daftuar) * Merge #9252: Release cs_main before calling ProcessNewBlock, or processing headers (cmpctblock handling) bd02bdd Release cs_main before processing cmpctblock as header (Suhas Daftuar) 680b0c0 Release cs_main before calling ProcessNewBlock (cmpctblock handling) (Suhas Daftuar) * Merge #9283: A few more CTransactionRef optimizations 91335ba Remove unused MakeTransactionRef overloads (Pieter Wuille) 6713f0f Make FillBlock consume txn_available to avoid shared_ptr copies (Pieter Wuille) 62607d7 Convert COrphanTx to keep a CTransactionRef (Pieter Wuille) c44e4c4 Make AcceptToMemoryPool take CTransactionRef (Pieter Wuille) * Merge #9375: Relay compact block messages prior to full block connection 02ee4eb Make most_recent_compact_block a pointer to a const (Matt Corallo) 73666ad Add comment to describe callers to ActivateBestChain (Matt Corallo) 962f7f0 Call ActivateBestChain without cs_main/with most_recent_block (Matt Corallo) 0df777d Use a temp pindex to avoid a const_cast in ProcessNewBlockHeaders (Matt Corallo) c1ae4fc Avoid holding cs_most_recent_block while calling ReadBlockFromDisk (Matt Corallo) 9eb67f5 Ensure we meet the BIP 152 old-relay-types response requirements (Matt Corallo) 5749a85 Cache most-recently-connected compact block (Matt Corallo) 9eaec08 Cache most-recently-announced block's shared_ptr (Matt Corallo) c802092 Relay compact block messages prior to full block connection (Matt Corallo) 6987219 Add a CValidationInterface::NewPoWValidBlock callback (Matt Corallo) 180586f Call AcceptBlock with the block's shared_ptr instead of CBlock& (Matt Corallo) 8baaba6 [qa] Avoid race in preciousblock test. (Matt Corallo) 9a0b2f4 [qa] Make compact blocks test construction using fetch methods (Matt Corallo) 8017547 Make CBlockIndex*es in net_processing const (Matt Corallo) * Merge #9486: Make peer=%d log prints consistent e6111b2 Make peer id logging consistent ("peer=%d" instead of "peer %d") (Matt Corallo) * Merge #9400: Set peers as HB peers upon full block validation d4781ac Set peers as HB peers upon full block validation (Gregory Sanders) * Merge #9499: Use recent-rejects, orphans, and recently-replaced txn for compact-block-reconstruction c594580 Add braces around AddToCompactExtraTransactions (Matt Corallo) 1ccfe9b Clarify comment about mempool/extra conflicts (Matt Corallo) fac4c78 Make PartiallyDownloadedBlock::InitData's second param const (Matt Corallo) b55b416 Add extra_count lower bound to compact reconstruction debug print (Matt Corallo) 863edb4 Consider all (<100k memusage) txn for compact-block-extra-txn cache (Matt Corallo) 7f8c8ca Consider all orphan txn for compact-block-extra-txn cache (Matt Corallo) 93380c5 Use replaced transactions in compact block reconstruction (Matt Corallo) 1531652 Keep shared_ptrs to recently-replaced txn for compact blocks (Matt Corallo) edded80 Make ATMP optionally return the CTransactionRefs it replaced (Matt Corallo) c735540 Move ORPHAN constants from validation.h to net_processing.h (Matt Corallo) * Merge #9587: Do not shadow local variable named `tx`. 44f2baa Do not shadow local variable named `tx`. (Pavel Janík) * Merge #9510: [trivial] Fix typos in comments cc16d99 [trivial] Fix typos in comments (practicalswift) * Merge #9604: [Trivial] add comment about setting peer as HB peer. dd5b011 [Trivial] add comment about setting peer as HB peer. (John Newbery) * Fix using of AcceptToMemoryPool in PrivateSend code * add `override` * fSupportsDesiredCmpctVersion * bring back tx ressurection in DisconnectTip * Fix delayed headers * Remove unused CConnman::FindNode overload * Fix typos and comments * Fix minor code differences * Don't use rejection cache for corrupted transactions Partly based on https://github.com/bitcoin/bitcoin/pull/8525 * Backport missed cs_main locking changes Missed from https://github.com/bitcoin/bitcoin/commit/58a215ce8c13b900cf982c39f8ee4879290d1a95 * Backport missed comments and mapBlockSource.emplace call Missed from two commits: https://github.com/bitcoin/bitcoin/commit/88c35491ab19f9afdf9b3fa9356a072f70ef2f55 https://github.com/bitcoin/bitcoin/commit/7c98ce584ec23bcddcba8cdb33efa6547212f6ef * Add CheckPeerHeaders() helper and check in (nCount == 0) too
2018-04-11 13:06:01 +02:00
newit->vTxHashesIdx = vTxHashes.size() - 1;
// Invalid ProTxes should never get this far because transactions should be
// fully checked by AcceptToMemoryPool() at this point, so we just assume that
// everything is fine here.
if (m_dmnman) {
addUncheckedProTx(newit, tx);
}
}
void CTxMemPool::addAddressIndex(const CTxMemPoolEntry& entry, const CCoinsViewCache& view)
2016-04-04 22:37:43 +02:00
{
LOCK(cs);
const CTransaction& tx = entry.GetTx();
std::vector<CMempoolAddressDeltaKey> inserted;
uint256 txhash = tx.GetHash();
for (unsigned int j = 0; j < tx.vin.size(); j++) {
const CTxIn input = tx.vin[j];
const Coin& coin = view.AccessCoin(input.prevout);
const CTxOut &prevout = coin.out;
AddressType address_type{AddressType::UNKNOWN};
uint160 address_bytes;
if (!AddressBytesFromScript(prevout.scriptPubKey, address_type, address_bytes)) {
continue;
2016-04-04 22:37:43 +02:00
}
CMempoolAddressDeltaKey key(address_type, address_bytes, txhash, j, /* tx_spent */ true);
CMempoolAddressDelta delta(entry.GetTime(), prevout.nValue * -1, input.prevout.hash, input.prevout.n);
mapAddress.insert(std::make_pair(key, delta));
inserted.push_back(key);
2016-04-04 22:37:43 +02:00
}
for (unsigned int k = 0; k < tx.vout.size(); k++) {
const CTxOut &out = tx.vout[k];
AddressType address_type{AddressType::UNKNOWN};
uint160 address_bytes;
if (!AddressBytesFromScript(out.scriptPubKey, address_type, address_bytes)) {
continue;
2016-04-04 22:37:43 +02:00
}
CMempoolAddressDeltaKey key(address_type, address_bytes, txhash, k, /* tx_spent */ false);
mapAddress.insert(std::make_pair(key, CMempoolAddressDelta(entry.GetTime(), out.nValue)));
inserted.push_back(key);
2016-04-04 22:37:43 +02:00
}
mapAddressInserted.insert(std::make_pair(txhash, inserted));
2016-04-04 22:37:43 +02:00
}
bool CTxMemPool::getAddressIndex(const std::vector<std::pair<uint160, AddressType>>& addresses,
std::vector<CMempoolAddressDeltaEntry>& results) const
2016-04-04 22:37:43 +02:00
{
LOCK(cs);
for (const auto& address : addresses) {
addressDeltaMap::const_iterator ait = mapAddress.lower_bound(CMempoolAddressDeltaKey(address.second, address.first));
while (ait != mapAddress.end() && (*ait).first.m_address_bytes == address.first && (*ait).first.m_address_type == address.second) {
2016-04-04 22:37:43 +02:00
results.push_back(*ait);
ait++;
}
}
return true;
}
bool CTxMemPool::removeAddressIndex(const uint256 txhash)
{
LOCK(cs);
addressDeltaMapInserted::iterator it = mapAddressInserted.find(txhash);
if (it != mapAddressInserted.end()) {
std::vector<CMempoolAddressDeltaKey> keys = (*it).second;
for (std::vector<CMempoolAddressDeltaKey>::iterator mit = keys.begin(); mit != keys.end(); mit++) {
mapAddress.erase(*mit);
}
mapAddressInserted.erase(it);
}
return true;
}
void CTxMemPool::addSpentIndex(const CTxMemPoolEntry& entry, const CCoinsViewCache& view)
2016-05-16 20:23:01 +02:00
{
LOCK(cs);
const CTransaction& tx = entry.GetTx();
std::vector<CSpentIndexKey> inserted;
uint256 txhash = tx.GetHash();
for (unsigned int j = 0; j < tx.vin.size(); j++) {
const CTxIn input = tx.vin[j];
const Coin& coin = view.AccessCoin(input.prevout);
const CTxOut &prevout = coin.out;
AddressType address_type{AddressType::UNKNOWN};
uint160 address_bytes;
2016-05-16 20:23:01 +02:00
if (!AddressBytesFromScript(prevout.scriptPubKey, address_type, address_bytes)) {
continue;
2016-05-16 20:23:01 +02:00
}
CSpentIndexKey key = CSpentIndexKey(input.prevout.hash, input.prevout.n);
CSpentIndexValue value = CSpentIndexValue(txhash, j, -1, prevout.nValue, address_type, address_bytes);
2016-05-16 20:23:01 +02:00
mapSpent.insert(std::make_pair(key, value));
2016-05-16 20:23:01 +02:00
inserted.push_back(key);
}
mapSpentInserted.insert(make_pair(txhash, inserted));
}
bool CTxMemPool::getSpentIndex(const CSpentIndexKey& key, CSpentIndexValue& value) const
2016-05-16 20:23:01 +02:00
{
LOCK(cs);
mapSpentIndex::const_iterator it = mapSpent.find(key);
2016-05-16 20:23:01 +02:00
if (it != mapSpent.end()) {
value = it->second;
return true;
}
return false;
}
bool CTxMemPool::removeSpentIndex(const uint256 txhash)
{
LOCK(cs);
mapSpentIndexInserted::iterator it = mapSpentInserted.find(txhash);
if (it != mapSpentInserted.end()) {
std::vector<CSpentIndexKey> keys = (*it).second;
for (std::vector<CSpentIndexKey>::iterator mit = keys.begin(); mit != keys.end(); mit++) {
mapSpent.erase(*mit);
}
mapSpentInserted.erase(it);
}
return true;
}
void CTxMemPool::addUncheckedProTx(indexed_transaction_set::iterator& newit, const CTransaction& tx)
{
assert(m_dmnman);
if (tx.nType == TRANSACTION_PROVIDER_REGISTER) {
auto proTx = *Assert(GetTxPayload<CProRegTx>(tx));
if (!proTx.collateralOutpoint.hash.IsNull()) {
mapProTxRefs.emplace(tx.GetHash(), proTx.collateralOutpoint.hash);
}
mapProTxAddresses.emplace(proTx.addr, tx.GetHash());
mapProTxPubKeyIDs.emplace(proTx.keyIDOwner, tx.GetHash());
mapProTxBlsPubKeyHashes.emplace(proTx.pubKeyOperator.GetHash(), tx.GetHash());
if (!proTx.collateralOutpoint.hash.IsNull()) {
mapProTxCollaterals.emplace(proTx.collateralOutpoint, tx.GetHash());
} else {
mapProTxCollaterals.emplace(COutPoint(tx.GetHash(), proTx.collateralOutpoint.n), tx.GetHash());
}
} else if (tx.nType == TRANSACTION_PROVIDER_UPDATE_SERVICE) {
auto proTx = *Assert(GetTxPayload<CProUpServTx>(tx));
mapProTxRefs.emplace(proTx.proTxHash, tx.GetHash());
mapProTxAddresses.emplace(proTx.addr, tx.GetHash());
} else if (tx.nType == TRANSACTION_PROVIDER_UPDATE_REGISTRAR) {
auto proTx = *Assert(GetTxPayload<CProUpRegTx>(tx));
mapProTxRefs.emplace(proTx.proTxHash, tx.GetHash());
mapProTxBlsPubKeyHashes.emplace(proTx.pubKeyOperator.GetHash(), tx.GetHash());
auto dmn = Assert(m_dmnman->GetListAtChainTip().GetMN(proTx.proTxHash));
newit->validForProTxKey = ::SerializeHash(dmn->pdmnState->pubKeyOperator);
if (dmn->pdmnState->pubKeyOperator != proTx.pubKeyOperator) {
newit->isKeyChangeProTx = true;
}
} else if (tx.nType == TRANSACTION_PROVIDER_UPDATE_REVOKE) {
auto proTx = *Assert(GetTxPayload<CProUpRevTx>(tx));
mapProTxRefs.emplace(proTx.proTxHash, tx.GetHash());
auto dmn = Assert(m_dmnman->GetListAtChainTip().GetMN(proTx.proTxHash));
newit->validForProTxKey = ::SerializeHash(dmn->pdmnState->pubKeyOperator);
if (dmn->pdmnState->pubKeyOperator.Get() != CBLSPublicKey()) {
newit->isKeyChangeProTx = true;
}
} else if (tx.nType == TRANSACTION_ASSET_UNLOCK) {
auto assetUnlockTx = *Assert(GetTxPayload<CAssetUnlockPayload>(tx));
mapAssetUnlockExpiry.insert({tx.GetHash(), assetUnlockTx.getHeightToExpiry()});
} else if (tx.nType == TRANSACTION_MNHF_SIGNAL) {
PrioritiseTransaction(tx.GetHash(), 0.1 * COIN);
}
}
void CTxMemPool::removeUnchecked(txiter it, MemPoolRemovalReason reason)
{
Merge #17477: Remove the mempool's NotifyEntryAdded and NotifyEntryRemoved signals e57980b4738c10344baf136de3e050a3cb958ca5 [mempool] Remove NotifyEntryAdded and NotifyEntryRemoved callbacks (John Newbery) 2dd561f36124972d2364f941de9c3417c65f05b6 [validation] Remove pool member from ConnectTrace (John Newbery) 969b65f3f527631ede1a31c7855151e5c5d91f8f [validation] Remove NotifyEntryRemoved callback from ConnectTrace (John Newbery) 5613f9842b4000fed088b8cf7b99674c328d15e1 [validation] Remove conflictedTxs from PerBlockConnectTrace (John Newbery) cdb893443cc16edf974f099b8485e04b3db1b1d7 [validation interface] Remove vtxConflicted from BlockConnected (John Newbery) 1168394d759b13af68acec6d5bfa04aaa24561f8 [wallet] Notify conflicted transactions in TransactionRemovedFromMempool (John Newbery) Pull request description: These boost signals were added in #9371, before we had a `TransactionRemovedFromMempool` method in the validation interface. The `NotifyEntryAdded` callback was used by validation to build a vector of conflicted transactions when connecting a block, which the wallet was notified of in the `BlockConnected` CValidationInterface callback. Now that we have a `TransactionRemovedFromMempool` callback, we can fire that signal directly from the mempool for conflicted transactions. Note that #9371 was implemented to ensure `-walletnotify` events were fired for these conflicted transaction. We inadvertently stopped sending these notifications in #16624 (Sep 2019 commit 7e89994). We should probably fix that, but in a different PR. ACKs for top commit: jonatack: Re-ACK e57980b ryanofsky: Code review ACK e57980b4738c10344baf136de3e050a3cb958ca5, no code changes since previous review, but helpful new code comments have been added and the PR description is now more clear about where the old code came from Tree-SHA512: 3bdbaf1ef2731e788462d4756e69c42a1efdcf168691ce1bbfdaa4b7b55ac3c5b1fd4ab7b90bcdec653703600501b4224d252cfc086aef28f9ce0da3b0563a69
2020-03-19 17:09:15 +01:00
if (reason != MemPoolRemovalReason::BLOCK) {
// Notify clients that a transaction has been removed from the mempool
// for any reason except being included in a block. Clients interested
// in transactions included in blocks can subscribe to the BlockConnected
// notification.
GetMainSignals().TransactionRemovedFromMempool(it->GetSharedTx(), reason);
}
const uint256 hash = it->GetTx().GetHash();
for (const CTxIn& txin : it->GetTx().vin)
mapNextTx.erase(txin.prevout);
RemoveUnbroadcastTx(hash, true /* add logging because unchecked */ );
Backport compact blocks functionality from bitcoin (#1966) * Merge #8068: Compact Blocks 48efec8 Fix some minor compact block issues that came up in review (Matt Corallo) ccd06b9 Elaborate bucket size math (Pieter Wuille) 0d4cb48 Use vTxHashes to optimize InitData significantly (Matt Corallo) 8119026 Provide a flat list of txid/terators to txn in CTxMemPool (Matt Corallo) 678ee97 Add BIP 152 to implemented BIPs list (Matt Corallo) 56ba516 Add reconstruction debug logging (Matt Corallo) 2f34a2e Get our "best three" peers to announce blocks using cmpctblocks (Matt Corallo) 927f8ee Add ability to fetch CNode by NodeId (Matt Corallo) d25cd3e Add receiver-side protocol implementation for CMPCTBLOCK stuff (Matt Corallo) 9c837d5 Add sender-side protocol implementation for CMPCTBLOCK stuff (Matt Corallo) 00c4078 Add protocol messages for short-ids blocks (Matt Corallo) e3b2222 Add some blockencodings tests (Matt Corallo) f4f8f14 Add TestMemPoolEntryHelper::FromTx version for CTransaction (Matt Corallo) 85ad31e Add partial-block block encodings API (Matt Corallo) 5249dac Add COMPACTSIZE wrapper similar to VARINT for serialization (Matt Corallo) cbda71c Move context-required checks from CheckBlockHeader to Contextual... (Matt Corallo) 7c29ec9 If AcceptBlockHeader returns true, pindex will be set. (Matt Corallo) 96806c3 Stop trimming when mapTx is empty (Pieter Wuille) * Merge #8408: Prevent fingerprinting, disk-DoS with compact blocks 1d06e49 Ignore CMPCTBLOCK messages for pruned blocks (Suhas Daftuar) 1de2a46 Ignore GETBLOCKTXN requests for unknown blocks (Suhas Daftuar) * Merge #8418: Add tests for compact blocks 45c7ddd Add p2p test for BIP 152 (compact blocks) (Suhas Daftuar) 9a22a6c Add support for compactblocks to mininode (Suhas Daftuar) a8689fd Tests: refactor compact size serialization in mininode (Suhas Daftuar) 9c8593d Implement SipHash in Python (Pieter Wuille) 56c87e9 Allow changing BIP9 parameters on regtest (Suhas Daftuar) * Merge #8505: Trivial: Fix typos in various files 1aacfc2 various typos (leijurv) * Merge #8449: [Trivial] Do not shadow local variable, cleanup a159f25 Remove redundand (and shadowing) declaration (Pavel Janík) cce3024 Do not shadow local variable, cleanup (Pavel Janík) * Merge #8739: [qa] Fix broken sendcmpct test in p2p-compactblocks.py 157254a Fix broken sendcmpct test in p2p-compactblocks.py (Suhas Daftuar) * Merge #8854: [qa] Fix race condition in p2p-compactblocks test b5fd666 [qa] Fix race condition in p2p-compactblocks test (Suhas Daftuar) * Merge #8393: Support for compact blocks together with segwit 27acfc1 [qa] Update p2p-compactblocks.py for compactblocks v2 (Suhas Daftuar) 422fac6 [qa] Add support for compactblocks v2 to mininode (Suhas Daftuar) f5b9b8f [qa] Fix bug in mininode witness deserialization (Suhas Daftuar) 6aa28ab Use cmpctblock type 2 for segwit-enabled transfer (Pieter Wuille) be7555f Fix overly-prescriptive p2p-segwit test for new fetch logic (Matt Corallo) 06128da Make GetFetchFlags always request witness objects from witness peers (Matt Corallo) * Merge #8882: [qa] Fix race conditions in p2p-compactblocks.py and sendheaders.py b55d941 [qa] Fix race condition in sendheaders.py (Suhas Daftuar) 6976db2 [qa] Another attempt to fix race condition in p2p-compactblocks.py (Suhas Daftuar) * Merge #8904: [qa] Fix compact block shortids for a test case 4cdece4 [qa] Fix compact block shortids for a test case (Dagur Valberg Johannsson) * Merge #8637: Compact Block Tweaks (rebase of #8235) 3ac6de0 Align constant names for maximum compact block / blocktxn depth (Pieter Wuille) b2e93a3 Add cmpctblock to debug help list (instagibbs) fe998e9 More agressively filter compact block requests (Matt Corallo) 02a337d Dont remove a "preferred" cmpctblock peer if they provide a block (Matt Corallo) * Merge #8975: Chainparams: Trivial: In AppInit2(), s/Params()/chainparams/ 6f2f639 Chainparams: Trivial: In AppInit2(), s/Params()/chainparams/ (Jorge Timón) * Merge #8968: Don't hold cs_main when calling ProcessNewBlock from a cmpctblock 72ca7d9 Don't hold cs_main when calling ProcessNewBlock from a cmpctblock (Matt Corallo) * Merge #8995: Add missing cs_main lock to ::GETBLOCKTXN processing dfe7906 Add missing cs_main lock to ::GETBLOCKTXN processing (Matt Corallo) * Merge #8515: A few mempool removal optimizations 0334430 Add some missing includes (Pieter Wuille) 4100499 Return shared_ptr<CTransaction> from mempool removes (Pieter Wuille) 51f2783 Make removed and conflicted arguments optional to remove (Pieter Wuille) f48211b Bypass removeRecursive in removeForReorg (Pieter Wuille) * Merge #9026: Fix handling of invalid compact blocks d4833ff Bump the protocol version to distinguish new banning behavior. (Suhas Daftuar) 88c3549 Fix compact block handling to not ban if block is invalid (Suhas Daftuar) c93beac [qa] Test that invalid compactblocks don't result in ban (Suhas Daftuar) * Merge #9039: Various serialization simplifcations and optimizations d59a518 Use fixed preallocation instead of costly GetSerializeSize (Pieter Wuille) 25a211a Add optimized CSizeComputer serializers (Pieter Wuille) a2929a2 Make CSerAction's ForRead() constexpr (Pieter Wuille) a603925 Avoid -Wshadow errors (Pieter Wuille) 5284721 Get rid of nType and nVersion (Pieter Wuille) 657e05a Make GetSerializeSize a wrapper on top of CSizeComputer (Pieter Wuille) fad9b66 Make nType and nVersion private and sometimes const (Pieter Wuille) c2c5d42 Make streams' read and write return void (Pieter Wuille) 50e8a9c Remove unused ReadVersion and WriteVersion (Pieter Wuille) * Merge #9058: Fixes for p2p-compactblocks.py test timeouts on travis (#8842) dac53b5 Modify getblocktxn handler not to drop requests for old blocks (Russell Yanofsky) 55bfddc [qa] Fix stale data bug in test_compactblocks_not_at_tip (Russell Yanofsky) 47e9659 [qa] Fix bug in compactblocks v2 merge (Russell Yanofsky) * Merge #9160: [trivial] Fix hungarian variable name ec34648 [trivial] Fix hungarian variable name (Russell Yanofsky) * Merge #9159: [qa] Wait for specific block announcement in p2p-compactblocks dfa44d1 [qa] Wait for specific block announcement in p2p-compactblocks (Russell Yanofsky) * Merge #9125: Make CBlock a vector of shared_ptr of CTransactions b4e4ba4 Introduce convenience type CTransactionRef (Pieter Wuille) 1662b43 Make CBlock::vtx a vector of shared_ptr<CTransaction> (Pieter Wuille) da60506 Add deserializing constructors to CTransaction and CMutableTransaction (Pieter Wuille) 0e85204 Add serialization for unique_ptr and shared_ptr (Pieter Wuille) * Merge #8872: Remove block-request logic from INV message processing 037159c Remove block-request logic from INV message processing (Matt Corallo) 3451203 [qa] Respond to getheaders and do not assume a getdata on inv (Matt Corallo) d768f15 [qa] Make comptool push blocks instead of relying on inv-fetch (mrbandrews) * Merge #9199: Always drop the least preferred HB peer when adding a new one. ca8549d Always drop the least preferred HB peer when adding a new one. (Gregory Maxwell) * Merge #9233: Fix some typos 15fa95d Fix some typos (fsb4000) * Merge #9260: Mrs Peacock in The Library with The Candlestick (killed main.{h,cpp}) 76faa3c Rename the remaining main.{h,cpp} to validation.{h,cpp} (Matt Corallo) e736772 Move network-msg-processing code out of main to its own file (Matt Corallo) 87c35f5 Remove orphan state wipe from UnloadBlockIndex. (Matt Corallo) * Merge #9014: Fix block-connection performance regression dd0df81 Document ConnectBlock connectTrace postconditions (Matt Corallo) 2d6e561 Switch pblock in ProcessNewBlock to a shared_ptr (Matt Corallo) 2736c44 Make the optional pblock in ActivateBestChain a shared_ptr (Matt Corallo) ae4db44 Create a shared_ptr for the block we're connecting in ActivateBCS (Matt Corallo) fd9d890 Keep blocks as shared_ptrs, instead of copying txn in ConnectTip (Matt Corallo) 6fdd43b Add struct to track block-connect-time-generated info for callbacks (Matt Corallo) * Merge #9240: Remove txConflicted a874ab5 remove internal tracking of mempool conflicts for reporting to wallet (Alex Morcos) bf663f8 remove external usage of mempool conflict tracking (Alex Morcos) * Merge #9344: Do not run functions with necessary side-effects in assert() da9cdd2 Do not run functions with necessary side-effects in assert() (Gregory Maxwell) * Merge #9273: Remove unused CDiskBlockPos* argument from ProcessNewBlock a13fa4c Remove unused CDiskBlockPos* argument from ProcessNewBlock (Matt Corallo) * Merge #9352: Attempt reconstruction from all compact block announcements 813ede9 [qa] Update compactblocks test for multi-peer reconstruction (Suhas Daftuar) 7017298 Allow compactblock reconstruction when block is in flight (Suhas Daftuar) * Merge #9252: Release cs_main before calling ProcessNewBlock, or processing headers (cmpctblock handling) bd02bdd Release cs_main before processing cmpctblock as header (Suhas Daftuar) 680b0c0 Release cs_main before calling ProcessNewBlock (cmpctblock handling) (Suhas Daftuar) * Merge #9283: A few more CTransactionRef optimizations 91335ba Remove unused MakeTransactionRef overloads (Pieter Wuille) 6713f0f Make FillBlock consume txn_available to avoid shared_ptr copies (Pieter Wuille) 62607d7 Convert COrphanTx to keep a CTransactionRef (Pieter Wuille) c44e4c4 Make AcceptToMemoryPool take CTransactionRef (Pieter Wuille) * Merge #9375: Relay compact block messages prior to full block connection 02ee4eb Make most_recent_compact_block a pointer to a const (Matt Corallo) 73666ad Add comment to describe callers to ActivateBestChain (Matt Corallo) 962f7f0 Call ActivateBestChain without cs_main/with most_recent_block (Matt Corallo) 0df777d Use a temp pindex to avoid a const_cast in ProcessNewBlockHeaders (Matt Corallo) c1ae4fc Avoid holding cs_most_recent_block while calling ReadBlockFromDisk (Matt Corallo) 9eb67f5 Ensure we meet the BIP 152 old-relay-types response requirements (Matt Corallo) 5749a85 Cache most-recently-connected compact block (Matt Corallo) 9eaec08 Cache most-recently-announced block's shared_ptr (Matt Corallo) c802092 Relay compact block messages prior to full block connection (Matt Corallo) 6987219 Add a CValidationInterface::NewPoWValidBlock callback (Matt Corallo) 180586f Call AcceptBlock with the block's shared_ptr instead of CBlock& (Matt Corallo) 8baaba6 [qa] Avoid race in preciousblock test. (Matt Corallo) 9a0b2f4 [qa] Make compact blocks test construction using fetch methods (Matt Corallo) 8017547 Make CBlockIndex*es in net_processing const (Matt Corallo) * Merge #9486: Make peer=%d log prints consistent e6111b2 Make peer id logging consistent ("peer=%d" instead of "peer %d") (Matt Corallo) * Merge #9400: Set peers as HB peers upon full block validation d4781ac Set peers as HB peers upon full block validation (Gregory Sanders) * Merge #9499: Use recent-rejects, orphans, and recently-replaced txn for compact-block-reconstruction c594580 Add braces around AddToCompactExtraTransactions (Matt Corallo) 1ccfe9b Clarify comment about mempool/extra conflicts (Matt Corallo) fac4c78 Make PartiallyDownloadedBlock::InitData's second param const (Matt Corallo) b55b416 Add extra_count lower bound to compact reconstruction debug print (Matt Corallo) 863edb4 Consider all (<100k memusage) txn for compact-block-extra-txn cache (Matt Corallo) 7f8c8ca Consider all orphan txn for compact-block-extra-txn cache (Matt Corallo) 93380c5 Use replaced transactions in compact block reconstruction (Matt Corallo) 1531652 Keep shared_ptrs to recently-replaced txn for compact blocks (Matt Corallo) edded80 Make ATMP optionally return the CTransactionRefs it replaced (Matt Corallo) c735540 Move ORPHAN constants from validation.h to net_processing.h (Matt Corallo) * Merge #9587: Do not shadow local variable named `tx`. 44f2baa Do not shadow local variable named `tx`. (Pavel Janík) * Merge #9510: [trivial] Fix typos in comments cc16d99 [trivial] Fix typos in comments (practicalswift) * Merge #9604: [Trivial] add comment about setting peer as HB peer. dd5b011 [Trivial] add comment about setting peer as HB peer. (John Newbery) * Fix using of AcceptToMemoryPool in PrivateSend code * add `override` * fSupportsDesiredCmpctVersion * bring back tx ressurection in DisconnectTip * Fix delayed headers * Remove unused CConnman::FindNode overload * Fix typos and comments * Fix minor code differences * Don't use rejection cache for corrupted transactions Partly based on https://github.com/bitcoin/bitcoin/pull/8525 * Backport missed cs_main locking changes Missed from https://github.com/bitcoin/bitcoin/commit/58a215ce8c13b900cf982c39f8ee4879290d1a95 * Backport missed comments and mapBlockSource.emplace call Missed from two commits: https://github.com/bitcoin/bitcoin/commit/88c35491ab19f9afdf9b3fa9356a072f70ef2f55 https://github.com/bitcoin/bitcoin/commit/7c98ce584ec23bcddcba8cdb33efa6547212f6ef * Add CheckPeerHeaders() helper and check in (nCount == 0) too
2018-04-11 13:06:01 +02:00
if (vTxHashes.size() > 1) {
vTxHashes[it->vTxHashesIdx] = std::move(vTxHashes.back());
vTxHashes[it->vTxHashesIdx].second->vTxHashesIdx = it->vTxHashesIdx;
vTxHashes.pop_back();
if (vTxHashes.size() * 2 < vTxHashes.capacity())
vTxHashes.shrink_to_fit();
} else
vTxHashes.clear();
if (m_dmnman) {
removeUncheckedProTx(it->GetTx());
}
totalTxSize -= it->GetTxSize();
m_total_fee -= it->GetFee();
cachedInnerUsage -= it->DynamicMemoryUsage();
cachedInnerUsage -= memusage::DynamicUsage(it->GetMemPoolParentsConst()) + memusage::DynamicUsage(it->GetMemPoolChildrenConst());
mapTx.erase(it);
nTransactionsUpdated++;
if (minerPolicyEstimator) {minerPolicyEstimator->removeTx(hash, false);}
removeAddressIndex(hash);
removeSpentIndex(hash);
}
void CTxMemPool::removeUncheckedProTx(const CTransaction& tx)
{
auto eraseProTxRef = [&](const uint256& proTxHash, const uint256& txHash) {
auto its = mapProTxRefs.equal_range(proTxHash);
for (auto it = its.first; it != its.second;) {
if (it->second == txHash) {
it = mapProTxRefs.erase(it);
} else {
++it;
}
}
};
if (tx.nType == TRANSACTION_PROVIDER_REGISTER) {
auto proTx = *Assert(GetTxPayload<CProRegTx>(tx));
if (!proTx.collateralOutpoint.IsNull()) {
eraseProTxRef(tx.GetHash(), proTx.collateralOutpoint.hash);
}
mapProTxAddresses.erase(proTx.addr);
mapProTxPubKeyIDs.erase(proTx.keyIDOwner);
mapProTxBlsPubKeyHashes.erase(proTx.pubKeyOperator.GetHash());
2018-10-25 16:29:50 +02:00
mapProTxCollaterals.erase(proTx.collateralOutpoint);
mapProTxCollaterals.erase(COutPoint(tx.GetHash(), proTx.collateralOutpoint.n));
} else if (tx.nType == TRANSACTION_PROVIDER_UPDATE_SERVICE) {
auto proTx = *Assert(GetTxPayload<CProUpServTx>(tx));
eraseProTxRef(proTx.proTxHash, tx.GetHash());
mapProTxAddresses.erase(proTx.addr);
} else if (tx.nType == TRANSACTION_PROVIDER_UPDATE_REGISTRAR) {
auto proTx = *Assert(GetTxPayload<CProUpRegTx>(tx));
eraseProTxRef(proTx.proTxHash, tx.GetHash());
mapProTxBlsPubKeyHashes.erase(proTx.pubKeyOperator.GetHash());
} else if (tx.nType == TRANSACTION_PROVIDER_UPDATE_REVOKE) {
auto proTx = *Assert(GetTxPayload<CProUpRevTx>(tx));
eraseProTxRef(proTx.proTxHash, tx.GetHash());
} else if (tx.nType == TRANSACTION_ASSET_UNLOCK) {
mapAssetUnlockExpiry.erase(tx.GetHash());
}
}
// Calculates descendants of entry that are not already in setDescendants, and adds to
Merge #19478: Remove CTxMempool::mapLinks data structure member 296be8f58e02b39a58f017c52294aceed22c3ffd Get rid of unused functions CTxMemPool::GetMemPoolChildren, CTxMemPool::GetMemPoolParents (Jeremy Rubin) 46d955d196043cc297834baeebce31ff778dff80 Remove mapLinks in favor of entry inlined structs with iterator type erasure (Jeremy Rubin) Pull request description: Currently we have a peculiar data structure in the mempool called maplinks. Maplinks job is to track the in-pool children and parents of each transaction. This PR can be primarily understood and reviewed as a simple refactoring to remove this extra data structure, although it comes with a nice memory and performance improvement for free. Maplinks is particularly peculiar because removing it is not as simple as just moving it's inner structure to the owning CTxMempoolEntry. Because TxLinks (the class storing the setEntries for parents and children) store txiters to each entry in the mempool corresponding to the parent or child, it means that the TxLinks type is "aware" of the boost multiindex (mapTx) it's coming from, which is in turn, aware of the entry type stored in mapTx. Thus we used maplinks to store this entry associated data we in an entirely separate data structure just to avoid a circular type reference caused by storing a txiter inside a CTxMempoolEntry. It turns out, we can kill this circular reference by making use of iterator_to multiindex function and std::reference_wrapper. This allows us to get rid of the maplinks data structure and move the ownership of the parents/child sets to the entries themselves. The benefit of this good all around, for any of the reasons given below the change would be acceptable, and it doesn't make the code harder to reason about or worse in any respect (as far as I can tell, there's no tradeoff). ### Simpler ownership model No longer having to consistency check that mapLinks did have records for our CTxMempoolEntry, impossible to have a mapLinks entry outlive or incorrectly die before a CTxMempoolEntry. ### Memory Usage We get rid of a O(Transactions) sized map in the mempool, which is a long lived data structure. ### Performance If you have a CTxMemPoolEntry, you immediately know the address of it's children/parents, rather than having to do a O(log(Transactions)) lookup via maplinks (which we do very often). We do it in *so many* places that a true benchmark has to look at a full running node, but it is easy enough to show an improvement in this case. The ComplexMemPool shows a good coherence check that we see the expected result of it being 12.5% faster / 1.14x faster. ``` Before: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.40462, 0.277222, 0.285339, 0.279793 After: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.22586, 0.243831, 0.247076, 0.244596 ``` The ComplexMemPool benchmark only checks doing addUnchecked and TrimToSize for 800 transactions. While this bench does a good job of hammering the relevant types of function, it doesn't test everything. Subbing in 5000 transactions shows a that the advantage isn't completely wiped out by other asymptotic factors (this isn't the only bottleneck in growing the mempool), but it's only a bit proportionally slower (10.8%, 1.12x), which adds evidence that this will be a good change for performance minded users. ``` # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 59.1321, 11.5919, 12.235, 11.7068 # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 52.1307, 10.2641, 10.5206, 10.4306 ``` I don't think it's possible to come up with an example of where a maplinks based design would have better performance, but it's something for reviewers to consider. # Discussion ## Why maplinks in the first place? I spoke with the author of mapLinks (sdaftuar) a while back, and my recollection from our conversation was that it was implemented because he did not know how to resolve the circular dependency at the time, and there was no other reason for making it a separate map. ## Is iterator_to weird? iterator_to is expressly for this purpose, see https://www.boost.org/doc/libs/1_51_0/libs/multi_index/doc/tutorial/indices.html#iterator_to > iterator_to provides a way to retrieve an iterator to an element from a pointer to the element, thus making iterators and pointers interchangeable for the purposes of element pointing (not so for traversal) in many situations. This notwithstanding, it is not the aim of iterator_to to promote the usage of pointers as substitutes for real iterators: the latter are specifically designed for handling the elements of a container, and not only benefit from the iterator orientation of container interfaces, but are also capable of exposing many more programming bugs than raw pointers, both at compile and run time. iterator_to is thus meant to be used in scenarios where access via iterators is not suitable or desireable: > > - Interoperability with preexisting APIs based on pointers or references. > - Publication of pointer-based interfaces (for instance, when designing a C-compatible library). > - The exposure of pointers in place of iterators can act as a type erasure barrier effectively decoupling the user of the code from the implementation detail of which particular container is being used. Similar techniques, like the famous Pimpl idiom, are used in large projects to reduce dependencies and build times. > - Self-referencing contexts where an element acts upon its owner container and no iterator to itself is available. In other words, iterator_to is the perfect tool for the job by the last reason given. Under the hood it should just be a simple pointer cast and have no major runtime overhead (depending on if the function call is inlined). Edit by laanwj: removed at sign from the description ACKs for top commit: jonatack: re-ACK 296be8f per `git range-diff ab338a19 3ba1665 296be8f`, sanity check gcc 10.2 debug build is clean. hebasto: re-ACK 296be8f58e02b39a58f017c52294aceed22c3ffd, only rebased since my [previous](https://github.com/bitcoin/bitcoin/pull/19478#pullrequestreview-482400727) review (verified with `git range-diff`). Tree-SHA512: f5c30a4936fcde6ae32a02823c303b3568a747c2681d11f87df88a149f984a6d3b4c81f391859afbeb68864ef7f6a3d8779f74a58e3de701b3d51f78e498682e
2020-09-07 12:06:38 +02:00
// setDescendants. Assumes entryit is already a tx in the mempool and CTxMemPoolEntry::m_children
// is correct for tx and all descendants.
// Also assumes that if an entry is in setDescendants already, then all
// in-mempool descendants of it are already in setDescendants as well, so that we
// can save time by not iterating over those entries.
void CTxMemPool::CalculateDescendants(txiter entryit, setEntries& setDescendants) const
{
setEntries stage;
if (setDescendants.count(entryit) == 0) {
stage.insert(entryit);
}
// Traverse down the children of entry, only adding children that are not
// accounted for in setDescendants already (because those children have either
// already been walked, or will be walked in this iteration).
while (!stage.empty()) {
txiter it = *stage.begin();
setDescendants.insert(it);
stage.erase(it);
Merge #19478: Remove CTxMempool::mapLinks data structure member 296be8f58e02b39a58f017c52294aceed22c3ffd Get rid of unused functions CTxMemPool::GetMemPoolChildren, CTxMemPool::GetMemPoolParents (Jeremy Rubin) 46d955d196043cc297834baeebce31ff778dff80 Remove mapLinks in favor of entry inlined structs with iterator type erasure (Jeremy Rubin) Pull request description: Currently we have a peculiar data structure in the mempool called maplinks. Maplinks job is to track the in-pool children and parents of each transaction. This PR can be primarily understood and reviewed as a simple refactoring to remove this extra data structure, although it comes with a nice memory and performance improvement for free. Maplinks is particularly peculiar because removing it is not as simple as just moving it's inner structure to the owning CTxMempoolEntry. Because TxLinks (the class storing the setEntries for parents and children) store txiters to each entry in the mempool corresponding to the parent or child, it means that the TxLinks type is "aware" of the boost multiindex (mapTx) it's coming from, which is in turn, aware of the entry type stored in mapTx. Thus we used maplinks to store this entry associated data we in an entirely separate data structure just to avoid a circular type reference caused by storing a txiter inside a CTxMempoolEntry. It turns out, we can kill this circular reference by making use of iterator_to multiindex function and std::reference_wrapper. This allows us to get rid of the maplinks data structure and move the ownership of the parents/child sets to the entries themselves. The benefit of this good all around, for any of the reasons given below the change would be acceptable, and it doesn't make the code harder to reason about or worse in any respect (as far as I can tell, there's no tradeoff). ### Simpler ownership model No longer having to consistency check that mapLinks did have records for our CTxMempoolEntry, impossible to have a mapLinks entry outlive or incorrectly die before a CTxMempoolEntry. ### Memory Usage We get rid of a O(Transactions) sized map in the mempool, which is a long lived data structure. ### Performance If you have a CTxMemPoolEntry, you immediately know the address of it's children/parents, rather than having to do a O(log(Transactions)) lookup via maplinks (which we do very often). We do it in *so many* places that a true benchmark has to look at a full running node, but it is easy enough to show an improvement in this case. The ComplexMemPool shows a good coherence check that we see the expected result of it being 12.5% faster / 1.14x faster. ``` Before: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.40462, 0.277222, 0.285339, 0.279793 After: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.22586, 0.243831, 0.247076, 0.244596 ``` The ComplexMemPool benchmark only checks doing addUnchecked and TrimToSize for 800 transactions. While this bench does a good job of hammering the relevant types of function, it doesn't test everything. Subbing in 5000 transactions shows a that the advantage isn't completely wiped out by other asymptotic factors (this isn't the only bottleneck in growing the mempool), but it's only a bit proportionally slower (10.8%, 1.12x), which adds evidence that this will be a good change for performance minded users. ``` # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 59.1321, 11.5919, 12.235, 11.7068 # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 52.1307, 10.2641, 10.5206, 10.4306 ``` I don't think it's possible to come up with an example of where a maplinks based design would have better performance, but it's something for reviewers to consider. # Discussion ## Why maplinks in the first place? I spoke with the author of mapLinks (sdaftuar) a while back, and my recollection from our conversation was that it was implemented because he did not know how to resolve the circular dependency at the time, and there was no other reason for making it a separate map. ## Is iterator_to weird? iterator_to is expressly for this purpose, see https://www.boost.org/doc/libs/1_51_0/libs/multi_index/doc/tutorial/indices.html#iterator_to > iterator_to provides a way to retrieve an iterator to an element from a pointer to the element, thus making iterators and pointers interchangeable for the purposes of element pointing (not so for traversal) in many situations. This notwithstanding, it is not the aim of iterator_to to promote the usage of pointers as substitutes for real iterators: the latter are specifically designed for handling the elements of a container, and not only benefit from the iterator orientation of container interfaces, but are also capable of exposing many more programming bugs than raw pointers, both at compile and run time. iterator_to is thus meant to be used in scenarios where access via iterators is not suitable or desireable: > > - Interoperability with preexisting APIs based on pointers or references. > - Publication of pointer-based interfaces (for instance, when designing a C-compatible library). > - The exposure of pointers in place of iterators can act as a type erasure barrier effectively decoupling the user of the code from the implementation detail of which particular container is being used. Similar techniques, like the famous Pimpl idiom, are used in large projects to reduce dependencies and build times. > - Self-referencing contexts where an element acts upon its owner container and no iterator to itself is available. In other words, iterator_to is the perfect tool for the job by the last reason given. Under the hood it should just be a simple pointer cast and have no major runtime overhead (depending on if the function call is inlined). Edit by laanwj: removed at sign from the description ACKs for top commit: jonatack: re-ACK 296be8f per `git range-diff ab338a19 3ba1665 296be8f`, sanity check gcc 10.2 debug build is clean. hebasto: re-ACK 296be8f58e02b39a58f017c52294aceed22c3ffd, only rebased since my [previous](https://github.com/bitcoin/bitcoin/pull/19478#pullrequestreview-482400727) review (verified with `git range-diff`). Tree-SHA512: f5c30a4936fcde6ae32a02823c303b3568a747c2681d11f87df88a149f984a6d3b4c81f391859afbeb68864ef7f6a3d8779f74a58e3de701b3d51f78e498682e
2020-09-07 12:06:38 +02:00
const CTxMemPoolEntry::Children& children = it->GetMemPoolChildrenConst();
for (const CTxMemPoolEntry& child : children) {
txiter childiter = mapTx.iterator_to(child);
if (!setDescendants.count(childiter)) {
stage.insert(childiter);
}
}
}
}
void CTxMemPool::removeRecursive(const CTransaction &origTx, MemPoolRemovalReason reason)
{
// Remove transaction from memory pool
AssertLockHeld(cs);
setEntries txToRemove;
txiter origit = mapTx.find(origTx.GetHash());
if (origit != mapTx.end()) {
txToRemove.insert(origit);
} else {
// When recursively removing but origTx isn't in the mempool
// be sure to remove any children that are in the pool. This can
// happen during chain re-orgs if origTx isn't re-accepted into
// the mempool for any reason.
for (unsigned int i = 0; i < origTx.vout.size(); i++) {
auto it = mapNextTx.find(COutPoint(origTx.GetHash(), i));
if (it == mapNextTx.end())
continue;
txiter nextit = mapTx.find(it->second->GetHash());
assert(nextit != mapTx.end());
txToRemove.insert(nextit);
}
}
setEntries setAllRemoves;
for (txiter it : txToRemove) {
CalculateDescendants(it, setAllRemoves);
}
RemoveStaged(setAllRemoves, false, reason);
}
void CTxMemPool::removeForReorg(CChainState& active_chainstate, int flags) EXCLUSIVE_LOCKS_REQUIRED(cs_main)
{
2015-09-06 06:40:21 +02:00
// Remove transactions spending a coinbase which are now immature and no-longer-final transactions
AssertLockHeld(cs);
setEntries txToRemove;
for (indexed_transaction_set::const_iterator it = mapTx.begin(); it != mapTx.end(); it++) {
const CTransaction& tx = it->GetTx();
LockPoints lp = it->GetLockPoints();
bool validLP = TestLockPointValidity(active_chainstate.m_chain, &lp);
CCoinsViewMemPool view_mempool(&active_chainstate.CoinsTip(), *this);
if (!CheckFinalTx(active_chainstate.m_chain.Tip(), tx, flags)
|| !CheckSequenceLocks(active_chainstate.m_chain.Tip(), view_mempool, tx, flags, &lp, validLP)) {
// Note if CheckSequenceLocks fails the LockPoints may still be invalid
// So it's critical that we remove the tx and not depend on the LockPoints.
txToRemove.insert(it);
} else if (it->GetSpendsCoinbase()) {
for (const CTxIn& txin : tx.vin) {
indexed_transaction_set::const_iterator it2 = mapTx.find(txin.prevout.hash);
if (it2 != mapTx.end())
continue;
const Coin &coin = active_chainstate.CoinsTip().AccessCoin(txin.prevout);
if (m_check_ratio != 0) assert(!coin.IsSpent());
unsigned int nMemPoolHeight = active_chainstate.m_chain.Tip()->nHeight + 1;
Merge #10195: Switch chainstate db and cache to per-txout model 589827975 scripted-diff: various renames for per-utxo consistency (Pieter Wuille) a5e02bc7f Increase travis unit test timeout (Pieter Wuille) 73de2c1ff Rename CCoinsCacheEntry::coins to coin (Pieter Wuille) 119e552f7 Merge CCoinsViewCache's GetOutputFor and AccessCoin (Pieter Wuille) 580b02309 [MOVEONLY] Move old CCoins class to txdb.cpp (Pieter Wuille) 8b25d2c0c Upgrade from per-tx database to per-txout (Pieter Wuille) b2af357f3 Reduce reserved memory space for flushing (Pieter Wuille) 41aa5b79a Pack Coin more tightly (Pieter Wuille) 97072d668 Remove unused CCoins methods (Pieter Wuille) ce23efaa5 Extend coins_tests (Pieter Wuille) 508307968 Switch CCoinsView and chainstate db from per-txid to per-txout (Pieter Wuille) 4ec0d9e79 Refactor GetUTXOStats in preparation for per-COutPoint iteration (Pieter Wuille) 13870b56f Replace CCoins-based CTxMemPool::pruneSpent with isSpent (Pieter Wuille) 05293f3cb Remove ModifyCoins/ModifyNewCoins (Pieter Wuille) 961e48397 Switch tests from ModifyCoins to AddCoin/SpendCoin (Pieter Wuille) 8b3868c1b Switch CScriptCheck to use Coin instead of CCoins (Pieter Wuille) c87b957a3 Only pass things committed to by tx's witness hash to CScriptCheck (Matt Corallo) f68cdfe92 Switch from per-tx to per-txout CCoinsViewCache methods in some places (Pieter Wuille) 000391132 Introduce new per-txout CCoinsViewCache functions (Pieter Wuille) bd83111a0 Optimization: Coin&& to ApplyTxInUndo (Pieter Wuille) cb2c7fdac Replace CTxInUndo with Coin (Pieter Wuille) 422634e2f Introduce Coin, a single unspent output (Pieter Wuille) 7d991b55d Store/allow tx metadata in all undo records (Pieter Wuille) c3aa0c119 Report on-disk size in gettxoutsetinfo (Pieter Wuille) d34242430 Remove/ignore tx version in utxo and undo (Pieter Wuille) 7e0032290 Add specialization of SipHash for 256 + 32 bit data (Pieter Wuille) e484652fc Introduce CHashVerifier to hash read data (Pieter Wuille) f54580e7e error() in disconnect for disk corruption, not inconsistency (Pieter Wuille) e66dbde6d Add SizeEstimate to CDBBatch (Pieter Wuille) Tree-SHA512: ce1fb1e40c77d38915cd02189fab7a8b125c7f44d425c85579d872c3bede3a437760997907c99d7b3017ced1c2de54b2ac7223d99d83a6658fe5ef61edef1de3
2017-06-02 00:47:58 +02:00
if (coin.IsSpent() || (coin.IsCoinBase() && ((signed long)nMemPoolHeight) - coin.nHeight < COINBASE_MATURITY)) {
txToRemove.insert(it);
break;
}
}
}
if (!validLP) {
mapTx.modify(it, update_lock_points(lp));
}
}
setEntries setAllRemoves;
for (txiter it : txToRemove) {
CalculateDescendants(it, setAllRemoves);
}
RemoveStaged(setAllRemoves, false, MemPoolRemovalReason::REORG);
}
void CTxMemPool::removeConflicts(const CTransaction &tx)
{
// Remove transactions which depend on inputs of tx, recursively
AssertLockHeld(cs);
for (const CTxIn &txin : tx.vin) {
auto it = mapNextTx.find(txin.prevout);
if (it != mapNextTx.end()) {
const CTransaction &txConflict = *it->second;
if (txConflict != tx)
{
if (txConflict.nType == TRANSACTION_PROVIDER_REGISTER) {
// Remove all other protxes which refer to this protx
// NOTE: Can't use equal_range here as every call to removeRecursive might invalidate iterators
while (true) {
auto itPro = mapProTxRefs.find(txConflict.GetHash());
if (itPro == mapProTxRefs.end()) {
break;
}
auto txit = mapTx.find(itPro->second);
if (txit != mapTx.end()) {
ClearPrioritisation(txit->GetTx().GetHash());
removeRecursive(txit->GetTx(), MemPoolRemovalReason::CONFLICT);
} else {
mapProTxRefs.erase(itPro);
}
}
}
ClearPrioritisation(txConflict.GetHash());
removeRecursive(txConflict, MemPoolRemovalReason::CONFLICT);
}
}
}
}
void CTxMemPool::removeProTxPubKeyConflicts(const CTransaction &tx, const CKeyID &keyId)
{
if (mapProTxPubKeyIDs.count(keyId)) {
uint256 conflictHash = mapProTxPubKeyIDs[keyId];
if (conflictHash != tx.GetHash() && mapTx.count(conflictHash)) {
removeRecursive(mapTx.find(conflictHash)->GetTx(), MemPoolRemovalReason::CONFLICT);
}
}
}
void CTxMemPool::removeProTxPubKeyConflicts(const CTransaction &tx, const CBLSLazyPublicKey &pubKey)
{
if (mapProTxBlsPubKeyHashes.count(pubKey.GetHash())) {
uint256 conflictHash = mapProTxBlsPubKeyHashes[pubKey.GetHash()];
if (conflictHash != tx.GetHash() && mapTx.count(conflictHash)) {
removeRecursive(mapTx.find(conflictHash)->GetTx(), MemPoolRemovalReason::CONFLICT);
}
}
}
2018-10-25 16:29:50 +02:00
void CTxMemPool::removeProTxCollateralConflicts(const CTransaction &tx, const COutPoint &collateralOutpoint)
{
if (mapProTxCollaterals.count(collateralOutpoint)) {
uint256 conflictHash = mapProTxCollaterals[collateralOutpoint];
if (conflictHash != tx.GetHash() && mapTx.count(conflictHash)) {
removeRecursive(mapTx.find(conflictHash)->GetTx(), MemPoolRemovalReason::CONFLICT);
}
}
}
void CTxMemPool::removeProTxSpentCollateralConflicts(const CTransaction &tx)
{
assert(m_dmnman);
// Remove TXs that refer to a MN for which the collateral was spent
auto removeSpentCollateralConflict = [&](const uint256& proTxHash) {
// Can't use equal_range here as every call to removeRecursive might invalidate iterators
2021-06-23 10:12:11 +02:00
AssertLockHeld(cs);
while (true) {
auto it = mapProTxRefs.find(proTxHash);
if (it == mapProTxRefs.end()) {
break;
}
auto conflictIt = mapTx.find(it->second);
if (conflictIt != mapTx.end()) {
removeRecursive(conflictIt->GetTx(), MemPoolRemovalReason::CONFLICT);
} else {
// Should not happen as we track referencing TXs in addUnchecked/removeUnchecked.
// But lets be on the safe side and not run into an endless loop...
2020-01-28 11:04:47 +01:00
LogPrint(BCLog::MEMPOOL, "%s: ERROR: found invalid TX ref in mapProTxRefs, proTxHash=%s, txHash=%s\n", __func__, proTxHash.ToString(), it->second.ToString());
mapProTxRefs.erase(it);
}
}
};
auto mnList = m_dmnman->GetListAtChainTip();
for (const auto& in : tx.vin) {
auto collateralIt = mapProTxCollaterals.find(in.prevout);
if (collateralIt != mapProTxCollaterals.end()) {
// These are not yet mined ProRegTxs
removeSpentCollateralConflict(collateralIt->second);
}
auto dmn = mnList.GetMNByCollateral(in.prevout);
if (dmn) {
// These are updates referring to a mined ProRegTx
removeSpentCollateralConflict(dmn->proTxHash);
}
}
}
void CTxMemPool::removeProTxKeyChangedConflicts(const CTransaction &tx, const uint256& proTxHash, const uint256& newKeyHash)
{
std::set<uint256> conflictingTxs;
for (auto its = mapProTxRefs.equal_range(proTxHash); its.first != its.second; ++its.first) {
auto txit = mapTx.find(its.first->second);
if (txit == mapTx.end()) {
continue;
}
if (txit->validForProTxKey != newKeyHash) {
conflictingTxs.emplace(txit->GetTx().GetHash());
}
}
for (const auto& txHash : conflictingTxs) {
auto& tx = mapTx.find(txHash)->GetTx();
removeRecursive(tx, MemPoolRemovalReason::CONFLICT);
}
}
void CTxMemPool::removeProTxConflicts(const CTransaction &tx)
{
removeProTxSpentCollateralConflicts(tx);
if (tx.nType == TRANSACTION_PROVIDER_REGISTER) {
const auto opt_proTx = GetTxPayload<CProRegTx>(tx);
if (!opt_proTx) {
LogPrint(BCLog::MEMPOOL, "%s: ERROR: Invalid transaction payload, tx: %s\n", __func__, tx.GetHash().ToString());
return;
}
auto& proTx = *opt_proTx;
if (mapProTxAddresses.count(proTx.addr)) {
uint256 conflictHash = mapProTxAddresses[proTx.addr];
if (conflictHash != tx.GetHash() && mapTx.count(conflictHash)) {
removeRecursive(mapTx.find(conflictHash)->GetTx(), MemPoolRemovalReason::CONFLICT);
}
}
removeProTxPubKeyConflicts(tx, proTx.keyIDOwner);
removeProTxPubKeyConflicts(tx, proTx.pubKeyOperator);
2018-10-25 16:29:50 +02:00
if (!proTx.collateralOutpoint.hash.IsNull()) {
removeProTxCollateralConflicts(tx, proTx.collateralOutpoint);
} else {
removeProTxCollateralConflicts(tx, COutPoint(tx.GetHash(), proTx.collateralOutpoint.n));
2018-10-25 16:29:50 +02:00
}
} else if (tx.nType == TRANSACTION_PROVIDER_UPDATE_SERVICE) {
const auto opt_proTx = GetTxPayload<CProUpServTx>(tx);
if (!opt_proTx) {
LogPrint(BCLog::MEMPOOL, "%s: ERROR: Invalid transaction payload, tx: %s\n", __func__, tx.GetHash().ToString());
return;
}
if (mapProTxAddresses.count(opt_proTx->addr)) {
uint256 conflictHash = mapProTxAddresses[opt_proTx->addr];
if (conflictHash != tx.GetHash() && mapTx.count(conflictHash)) {
removeRecursive(mapTx.find(conflictHash)->GetTx(), MemPoolRemovalReason::CONFLICT);
}
}
} else if (tx.nType == TRANSACTION_PROVIDER_UPDATE_REGISTRAR) {
const auto opt_proTx = GetTxPayload<CProUpRegTx>(tx);
if (!opt_proTx) {
LogPrint(BCLog::MEMPOOL, "%s: ERROR: Invalid transaction payload, tx: %s\n", __func__, tx.GetHash().ToString());
return;
}
removeProTxPubKeyConflicts(tx, opt_proTx->pubKeyOperator);
removeProTxKeyChangedConflicts(tx, opt_proTx->proTxHash, ::SerializeHash(opt_proTx->pubKeyOperator));
} else if (tx.nType == TRANSACTION_PROVIDER_UPDATE_REVOKE) {
const auto opt_proTx = GetTxPayload<CProUpRevTx>(tx);
if (!opt_proTx) {
LogPrint(BCLog::MEMPOOL, "%s: ERROR: Invalid transaction payload, tx: %s\n", __func__, tx.GetHash().ToString());
return;
}
removeProTxKeyChangedConflicts(tx, opt_proTx->proTxHash, ::SerializeHash(CBLSPublicKey()));
}
}
/**
* Called when a block is connected. Removes from mempool and updates the miner fee estimator.
*/
void CTxMemPool::removeForBlock(const std::vector<CTransactionRef>& vtx, unsigned int nBlockHeight)
estimatefee / estimatepriority RPC methods New RPC methods: return an estimate of the fee (or priority) a transaction needs to be likely to confirm in a given number of blocks. Mike Hearn created the first version of this method for estimating fees. It works as follows: For transactions that took 1 to N (I picked N=25) blocks to confirm, keep N buckets with at most 100 entries in each recording the fees-per-kilobyte paid by those transactions. (separate buckets are kept for transactions that confirmed because they are high-priority) The buckets are filled as blocks are found, and are saved/restored in a new fee_estiamtes.dat file in the data directory. A few variations on Mike's initial scheme: To estimate the fee needed for a transaction to confirm in X buckets, all of the samples in all of the buckets are used and a median of all of the data is used to make the estimate. For example, imagine 25 buckets each containing the full 100 entries. Those 2,500 samples are sorted, and the estimate of the fee needed to confirm in the very next block is the 50'th-highest-fee-entry in that sorted list; the estimate of the fee needed to confirm in the next two blocks is the 150'th-highest-fee-entry, etc. That algorithm has the nice property that estimates of how much fee you need to pay to get confirmed in block N will always be greater than or equal to the estimate for block N+1. It would clearly be wrong to say "pay 11 uBTC and you'll get confirmed in 3 blocks, but pay 12 uBTC and it will take LONGER". A single block will not contribute more than 10 entries to any one bucket, so a single miner and a large block cannot overwhelm the estimates.
2014-03-17 13:19:54 +01:00
{
AssertLockHeld(cs);
std::vector<const CTxMemPoolEntry*> entries;
for (const auto& tx : vtx)
estimatefee / estimatepriority RPC methods New RPC methods: return an estimate of the fee (or priority) a transaction needs to be likely to confirm in a given number of blocks. Mike Hearn created the first version of this method for estimating fees. It works as follows: For transactions that took 1 to N (I picked N=25) blocks to confirm, keep N buckets with at most 100 entries in each recording the fees-per-kilobyte paid by those transactions. (separate buckets are kept for transactions that confirmed because they are high-priority) The buckets are filled as blocks are found, and are saved/restored in a new fee_estiamtes.dat file in the data directory. A few variations on Mike's initial scheme: To estimate the fee needed for a transaction to confirm in X buckets, all of the samples in all of the buckets are used and a median of all of the data is used to make the estimate. For example, imagine 25 buckets each containing the full 100 entries. Those 2,500 samples are sorted, and the estimate of the fee needed to confirm in the very next block is the 50'th-highest-fee-entry in that sorted list; the estimate of the fee needed to confirm in the next two blocks is the 150'th-highest-fee-entry, etc. That algorithm has the nice property that estimates of how much fee you need to pay to get confirmed in block N will always be greater than or equal to the estimate for block N+1. It would clearly be wrong to say "pay 11 uBTC and you'll get confirmed in 3 blocks, but pay 12 uBTC and it will take LONGER". A single block will not contribute more than 10 entries to any one bucket, so a single miner and a large block cannot overwhelm the estimates.
2014-03-17 13:19:54 +01:00
{
uint256 hash = tx->GetHash();
indexed_transaction_set::iterator i = mapTx.find(hash);
if (i != mapTx.end())
entries.push_back(&*i);
estimatefee / estimatepriority RPC methods New RPC methods: return an estimate of the fee (or priority) a transaction needs to be likely to confirm in a given number of blocks. Mike Hearn created the first version of this method for estimating fees. It works as follows: For transactions that took 1 to N (I picked N=25) blocks to confirm, keep N buckets with at most 100 entries in each recording the fees-per-kilobyte paid by those transactions. (separate buckets are kept for transactions that confirmed because they are high-priority) The buckets are filled as blocks are found, and are saved/restored in a new fee_estiamtes.dat file in the data directory. A few variations on Mike's initial scheme: To estimate the fee needed for a transaction to confirm in X buckets, all of the samples in all of the buckets are used and a median of all of the data is used to make the estimate. For example, imagine 25 buckets each containing the full 100 entries. Those 2,500 samples are sorted, and the estimate of the fee needed to confirm in the very next block is the 50'th-highest-fee-entry in that sorted list; the estimate of the fee needed to confirm in the next two blocks is the 150'th-highest-fee-entry, etc. That algorithm has the nice property that estimates of how much fee you need to pay to get confirmed in block N will always be greater than or equal to the estimate for block N+1. It would clearly be wrong to say "pay 11 uBTC and you'll get confirmed in 3 blocks, but pay 12 uBTC and it will take LONGER". A single block will not contribute more than 10 entries to any one bucket, so a single miner and a large block cannot overwhelm the estimates.
2014-03-17 13:19:54 +01:00
}
// Before the txs in the new block have been removed from the mempool, update policy estimates
if (minerPolicyEstimator) {minerPolicyEstimator->processBlock(nBlockHeight, entries);}
for (const auto& tx : vtx)
estimatefee / estimatepriority RPC methods New RPC methods: return an estimate of the fee (or priority) a transaction needs to be likely to confirm in a given number of blocks. Mike Hearn created the first version of this method for estimating fees. It works as follows: For transactions that took 1 to N (I picked N=25) blocks to confirm, keep N buckets with at most 100 entries in each recording the fees-per-kilobyte paid by those transactions. (separate buckets are kept for transactions that confirmed because they are high-priority) The buckets are filled as blocks are found, and are saved/restored in a new fee_estiamtes.dat file in the data directory. A few variations on Mike's initial scheme: To estimate the fee needed for a transaction to confirm in X buckets, all of the samples in all of the buckets are used and a median of all of the data is used to make the estimate. For example, imagine 25 buckets each containing the full 100 entries. Those 2,500 samples are sorted, and the estimate of the fee needed to confirm in the very next block is the 50'th-highest-fee-entry in that sorted list; the estimate of the fee needed to confirm in the next two blocks is the 150'th-highest-fee-entry, etc. That algorithm has the nice property that estimates of how much fee you need to pay to get confirmed in block N will always be greater than or equal to the estimate for block N+1. It would clearly be wrong to say "pay 11 uBTC and you'll get confirmed in 3 blocks, but pay 12 uBTC and it will take LONGER". A single block will not contribute more than 10 entries to any one bucket, so a single miner and a large block cannot overwhelm the estimates.
2014-03-17 13:19:54 +01:00
{
txiter it = mapTx.find(tx->GetHash());
if (it != mapTx.end()) {
setEntries stage;
stage.insert(it);
RemoveStaged(stage, true, MemPoolRemovalReason::BLOCK);
}
removeConflicts(*tx);
if (m_dmnman) {
removeProTxConflicts(*tx);
}
ClearPrioritisation(tx->GetHash());
estimatefee / estimatepriority RPC methods New RPC methods: return an estimate of the fee (or priority) a transaction needs to be likely to confirm in a given number of blocks. Mike Hearn created the first version of this method for estimating fees. It works as follows: For transactions that took 1 to N (I picked N=25) blocks to confirm, keep N buckets with at most 100 entries in each recording the fees-per-kilobyte paid by those transactions. (separate buckets are kept for transactions that confirmed because they are high-priority) The buckets are filled as blocks are found, and are saved/restored in a new fee_estiamtes.dat file in the data directory. A few variations on Mike's initial scheme: To estimate the fee needed for a transaction to confirm in X buckets, all of the samples in all of the buckets are used and a median of all of the data is used to make the estimate. For example, imagine 25 buckets each containing the full 100 entries. Those 2,500 samples are sorted, and the estimate of the fee needed to confirm in the very next block is the 50'th-highest-fee-entry in that sorted list; the estimate of the fee needed to confirm in the next two blocks is the 150'th-highest-fee-entry, etc. That algorithm has the nice property that estimates of how much fee you need to pay to get confirmed in block N will always be greater than or equal to the estimate for block N+1. It would clearly be wrong to say "pay 11 uBTC and you'll get confirmed in 3 blocks, but pay 12 uBTC and it will take LONGER". A single block will not contribute more than 10 entries to any one bucket, so a single miner and a large block cannot overwhelm the estimates.
2014-03-17 13:19:54 +01:00
}
lastRollingFeeUpdate = GetTime();
blockSinceLastRollingFeeBump = true;
estimatefee / estimatepriority RPC methods New RPC methods: return an estimate of the fee (or priority) a transaction needs to be likely to confirm in a given number of blocks. Mike Hearn created the first version of this method for estimating fees. It works as follows: For transactions that took 1 to N (I picked N=25) blocks to confirm, keep N buckets with at most 100 entries in each recording the fees-per-kilobyte paid by those transactions. (separate buckets are kept for transactions that confirmed because they are high-priority) The buckets are filled as blocks are found, and are saved/restored in a new fee_estiamtes.dat file in the data directory. A few variations on Mike's initial scheme: To estimate the fee needed for a transaction to confirm in X buckets, all of the samples in all of the buckets are used and a median of all of the data is used to make the estimate. For example, imagine 25 buckets each containing the full 100 entries. Those 2,500 samples are sorted, and the estimate of the fee needed to confirm in the very next block is the 50'th-highest-fee-entry in that sorted list; the estimate of the fee needed to confirm in the next two blocks is the 150'th-highest-fee-entry, etc. That algorithm has the nice property that estimates of how much fee you need to pay to get confirmed in block N will always be greater than or equal to the estimate for block N+1. It would clearly be wrong to say "pay 11 uBTC and you'll get confirmed in 3 blocks, but pay 12 uBTC and it will take LONGER". A single block will not contribute more than 10 entries to any one bucket, so a single miner and a large block cannot overwhelm the estimates.
2014-03-17 13:19:54 +01:00
}
/**
* Called when a lenght of chain is increased. Removes from mempool expired asset-unlock transactions
*/
void CTxMemPool::removeExpiredAssetUnlock(int nBlockHeight)
{
AssertLockHeld(cs);
// items to removed should be firstly collected to independed list,
// because removing items by `removeRecursive` changes the mapAssetUnlockExpiry
std::vector<CTransactionRef> entries;
for (const auto& item: mapAssetUnlockExpiry) {
if (item.second < nBlockHeight) {
entries.push_back(get(item.first));
}
}
for (const auto& tx : entries) {
removeRecursive(*tx, MemPoolRemovalReason::EXPIRY);
}
}
void CTxMemPool::_clear()
{
vTxHashes.clear();
mapTx.clear();
mapNextTx.clear();
mapProTxAddresses.clear();
mapProTxPubKeyIDs.clear();
totalTxSize = 0;
m_total_fee = 0;
cachedInnerUsage = 0;
lastRollingFeeUpdate = GetTime();
blockSinceLastRollingFeeBump = false;
rollingMinimumFeeRate = 0;
++nTransactionsUpdated;
}
void CTxMemPool::clear()
{
LOCK(cs);
_clear();
}
static void CheckInputsAndUpdateCoins(const CTransaction& tx, CCoinsViewCache& mempoolDuplicate, const int64_t spendheight)
{
Merge #15921: validation: Tidy up ValidationState interface 3004d5a12d09d94bfc4dee2a8e8f2291996a4aaf [validation] Remove fMissingInputs from AcceptToMemoryPool() (John Newbery) c428622a5bb1e37b2e6ab2c52791ac05d9271238 [validation] Remove unused first_invalid parameter from ProcessNewBlockHeaders() (John Newbery) 7204c6434b944f6ad51b3c895837729d3aa56eea [validation] Remove useless ret parameter from Invalid() (John Newbery) 1a37de4b3174d19a6d8691ae07e92b32fdfaef11 [validation] Remove error() calls from Invalid() calls (John Newbery) 067981e49246822421a7bcc720491427e1dba8a3 [validation] Tidy Up ValidationResult class (John Newbery) a27a2957ed9afbe5a96caa5f0f4cbec730d27460 [validation] Add CValidationState subclasses (John Newbery) Pull request description: Carries out some remaining tidy-ups remaining after PR 15141: - split ValidationState into TxValidationState and BlockValidationState (commit from ajtowns) - various minor code style tidy-ups to the ValidationState class - remove the useless `ret` parameter from `ValidationState::Invalid()` - remove the now unused `first_invalid` parameter from `ProcessNewBlockHeaders()` - remove the `fMissingInputs` parameter from `AcceptToMemoryPool()`, and deal with missing inputs the same way as other errors by using the `TxValidationState` object. Tip for reviewers (thanks ryanofsky!): The first commit ("[validation] Add CValidationState subclasses" ) is huge and can be easier to start reviewing if you revert the rote, mechanical changes: Substitute the commit hash of commit "[validation] Add CValidationState subclasses" for <CommitHash> in the commands below. ```sh git checkout <CommitHash> git grep -l ValidationState | xargs sed -i 's/BlockValidationState\|TxValidationState/CValidationState/g' git grep -l ValidationResult | xargs sed -i 's/BlockValidationResult\|TxValidationResult/ValidationInvalidReason/g' git grep -l MaybePunish | xargs sed -i 's/MaybePunishNode\(ForBlock\|ForTx\)/MaybePunishNode/g' git diff HEAD^ ``` After that it's possible to easily see the mechanical changes with: ```sh git log -p -n1 -U0 --word-diff-regex=. <CommitHash> ``` ACKs for top commit: laanwj: ACK 3004d5a12d09d94bfc4dee2a8e8f2291996a4aaf amitiuttarwar: code review ACK 3004d5a12d09d94bfc4dee2a8e8f2291996a4aaf. Also built & ran tests locally. fjahr: Code review ACK 3004d5a12d09d94bfc4dee2a8e8f2291996a4aaf . Only nit style change and pure virtual destructor added since my last review. ryanofsky: Code review ACK 3004d5a12d09d94bfc4dee2a8e8f2291996a4aaf. Just whitespace change and pure virtual destructor added since last review. Tree-SHA512: 511de1fb380a18bec1944ea82b513b6192df632ee08bb16344a2df3c40811a88f3872f04df24bc93a41643c96c48f376a04551840fd804a961490d6c702c3d36
2019-10-30 15:27:22 +01:00
TxValidationState dummy_state; // Not used. CheckTxInputs() should always pass
CAmount txfee = 0;
Merge #15921: validation: Tidy up ValidationState interface 3004d5a12d09d94bfc4dee2a8e8f2291996a4aaf [validation] Remove fMissingInputs from AcceptToMemoryPool() (John Newbery) c428622a5bb1e37b2e6ab2c52791ac05d9271238 [validation] Remove unused first_invalid parameter from ProcessNewBlockHeaders() (John Newbery) 7204c6434b944f6ad51b3c895837729d3aa56eea [validation] Remove useless ret parameter from Invalid() (John Newbery) 1a37de4b3174d19a6d8691ae07e92b32fdfaef11 [validation] Remove error() calls from Invalid() calls (John Newbery) 067981e49246822421a7bcc720491427e1dba8a3 [validation] Tidy Up ValidationResult class (John Newbery) a27a2957ed9afbe5a96caa5f0f4cbec730d27460 [validation] Add CValidationState subclasses (John Newbery) Pull request description: Carries out some remaining tidy-ups remaining after PR 15141: - split ValidationState into TxValidationState and BlockValidationState (commit from ajtowns) - various minor code style tidy-ups to the ValidationState class - remove the useless `ret` parameter from `ValidationState::Invalid()` - remove the now unused `first_invalid` parameter from `ProcessNewBlockHeaders()` - remove the `fMissingInputs` parameter from `AcceptToMemoryPool()`, and deal with missing inputs the same way as other errors by using the `TxValidationState` object. Tip for reviewers (thanks ryanofsky!): The first commit ("[validation] Add CValidationState subclasses" ) is huge and can be easier to start reviewing if you revert the rote, mechanical changes: Substitute the commit hash of commit "[validation] Add CValidationState subclasses" for <CommitHash> in the commands below. ```sh git checkout <CommitHash> git grep -l ValidationState | xargs sed -i 's/BlockValidationState\|TxValidationState/CValidationState/g' git grep -l ValidationResult | xargs sed -i 's/BlockValidationResult\|TxValidationResult/ValidationInvalidReason/g' git grep -l MaybePunish | xargs sed -i 's/MaybePunishNode\(ForBlock\|ForTx\)/MaybePunishNode/g' git diff HEAD^ ``` After that it's possible to easily see the mechanical changes with: ```sh git log -p -n1 -U0 --word-diff-regex=. <CommitHash> ``` ACKs for top commit: laanwj: ACK 3004d5a12d09d94bfc4dee2a8e8f2291996a4aaf amitiuttarwar: code review ACK 3004d5a12d09d94bfc4dee2a8e8f2291996a4aaf. Also built & ran tests locally. fjahr: Code review ACK 3004d5a12d09d94bfc4dee2a8e8f2291996a4aaf . Only nit style change and pure virtual destructor added since my last review. ryanofsky: Code review ACK 3004d5a12d09d94bfc4dee2a8e8f2291996a4aaf. Just whitespace change and pure virtual destructor added since last review. Tree-SHA512: 511de1fb380a18bec1944ea82b513b6192df632ee08bb16344a2df3c40811a88f3872f04df24bc93a41643c96c48f376a04551840fd804a961490d6c702c3d36
2019-10-30 15:27:22 +01:00
bool fCheckResult = tx.IsCoinBase() || Consensus::CheckTxInputs(tx, dummy_state, mempoolDuplicate, spendheight, txfee);
assert(fCheckResult);
UpdateCoins(tx, mempoolDuplicate, std::numeric_limits<int>::max());
}
void CTxMemPool::check(CChainState& active_chainstate) const
{
if (m_check_ratio == 0) return;
if (GetRand(m_check_ratio) >= 1) return;
AssertLockHeld(::cs_main);
LOCK(cs);
Backport Bitcoin#9424, Bitcoin#10123 and Bitcoin#10153 (#2918) * Contains dashification. disables `-debug dash` Merge #9424: Change LogAcceptCategory to use uint32_t rather than sets of strings. 6b3bb3d Change LogAcceptCategory to use uint32_t rather than sets of strings. (Gregory Maxwell) Tree-SHA512: ebb5bcf9a7d00a32dd1390b727ff4d29330a038423611da01268d8e1d2c0229e52a1098e751d4e6db73ef4ae862e1e96d38249883fcaf12b68f55ebb01035b34 Signed-off-by: Pasta <Pasta@dash.org> 31 -> 32 Signed-off-by: Pasta <Pasta@dash.org> * Merge #10123: Allow debug logs to be excluded from specified component 3bde556 Add -debugexclude option to switch off logging for specified components (John Newbery) Tree-SHA512: 30202e3f2085fc2fc5dd4bedb92988f4cb162c612a42cf8f6395a7da326f34975ddc347f82bc4ddca6c84c438dc0cc6e87869f90c7ff88105dbeaa52a947fa43 * bump to uint64_t due to added Dash codes Signed-off-by: Pasta <Pasta@dash.org> * bump to uint64_t due to added Dash codes cont. Signed-off-by: Pasta <Pasta@dash.org> * string -> BCLog format Signed-off-by: Pasta <Pasta@dash.org> * uint32_t -> uint64_t Signed-off-by: Pasta <Pasta@dash.org> * Fix CBatchedLogger * Fix most fDebug-s * Fix `debug` rpc * Fix BENCH and RAND conflicts * Add ALERT and use it * Update LogPrint-s in dash-specific code * Tweak few log categories Specifically: - use PRIVATESEND in `CPrivateSendClientManager::GetRandomNotUsedMasternode()` - use ZMQ in `CZMQPublishRawGovernanceVoteNotifier::NotifyGovernanceVote()` and `CZMQPublishRawGovernanceObjectNotifier::NotifyGovernanceObject()` * Drop no longer used MASTERNODE category * Merge #10153: logging: Fix off-by-one for shrinkdebugfile default faab624 logging: Fix off-by-one for shrinkdebugfile (MarcoFalke) Tree-SHA512: d6153e06067906172ff0611af9e585a3ecf0a7d56925b6ad7c12e75aa802441047059b9b6f6c78e79916c3f2abc8f1998bfd2d5b84201ec6421f727c08da3c21 * Shift dash-specific log categories to start from `1ul << 32` to avoid potential future conflicts with bitcoin ones * Fix `dash` category * remove debugCategories Signed-off-by: Pasta <Pasta@dash.org> * Prepend "std::" to find call * Check for BCLog::PRIVATESEND instead of logCategories != BCLog::NONE * Use BCLog::MNPAYMENTS category instead of checking for logCategories != BCLog::NONE * Move "End Dash" comment below "ALERT" When adding new entries here, we'll otherwise get confused with ordering and might end up forgetting that adding something Dash specific must continue with the bit after 43.
2019-05-22 23:51:39 +02:00
LogPrint(BCLog::MEMPOOL, "Checking mempool with %u transactions and %u inputs\n", (unsigned int)mapTx.size(), (unsigned int)mapNextTx.size());
uint64_t checkTotal = 0;
CAmount check_total_fee{0};
uint64_t innerUsage = 0;
CCoinsViewCache& active_coins_tip = active_chainstate.CoinsTip();
CCoinsViewCache mempoolDuplicate(const_cast<CCoinsViewCache*>(&active_coins_tip));
const int64_t spendheight = active_chainstate.m_chain.Height() + 1;
std::list<const CTxMemPoolEntry*> waitingOnDependants;
for (indexed_transaction_set::const_iterator it = mapTx.begin(); it != mapTx.end(); it++) {
unsigned int i = 0;
checkTotal += it->GetTxSize();
check_total_fee += it->GetFee();
innerUsage += it->DynamicMemoryUsage();
const CTransaction& tx = it->GetTx();
Merge #19478: Remove CTxMempool::mapLinks data structure member 296be8f58e02b39a58f017c52294aceed22c3ffd Get rid of unused functions CTxMemPool::GetMemPoolChildren, CTxMemPool::GetMemPoolParents (Jeremy Rubin) 46d955d196043cc297834baeebce31ff778dff80 Remove mapLinks in favor of entry inlined structs with iterator type erasure (Jeremy Rubin) Pull request description: Currently we have a peculiar data structure in the mempool called maplinks. Maplinks job is to track the in-pool children and parents of each transaction. This PR can be primarily understood and reviewed as a simple refactoring to remove this extra data structure, although it comes with a nice memory and performance improvement for free. Maplinks is particularly peculiar because removing it is not as simple as just moving it's inner structure to the owning CTxMempoolEntry. Because TxLinks (the class storing the setEntries for parents and children) store txiters to each entry in the mempool corresponding to the parent or child, it means that the TxLinks type is "aware" of the boost multiindex (mapTx) it's coming from, which is in turn, aware of the entry type stored in mapTx. Thus we used maplinks to store this entry associated data we in an entirely separate data structure just to avoid a circular type reference caused by storing a txiter inside a CTxMempoolEntry. It turns out, we can kill this circular reference by making use of iterator_to multiindex function and std::reference_wrapper. This allows us to get rid of the maplinks data structure and move the ownership of the parents/child sets to the entries themselves. The benefit of this good all around, for any of the reasons given below the change would be acceptable, and it doesn't make the code harder to reason about or worse in any respect (as far as I can tell, there's no tradeoff). ### Simpler ownership model No longer having to consistency check that mapLinks did have records for our CTxMempoolEntry, impossible to have a mapLinks entry outlive or incorrectly die before a CTxMempoolEntry. ### Memory Usage We get rid of a O(Transactions) sized map in the mempool, which is a long lived data structure. ### Performance If you have a CTxMemPoolEntry, you immediately know the address of it's children/parents, rather than having to do a O(log(Transactions)) lookup via maplinks (which we do very often). We do it in *so many* places that a true benchmark has to look at a full running node, but it is easy enough to show an improvement in this case. The ComplexMemPool shows a good coherence check that we see the expected result of it being 12.5% faster / 1.14x faster. ``` Before: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.40462, 0.277222, 0.285339, 0.279793 After: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.22586, 0.243831, 0.247076, 0.244596 ``` The ComplexMemPool benchmark only checks doing addUnchecked and TrimToSize for 800 transactions. While this bench does a good job of hammering the relevant types of function, it doesn't test everything. Subbing in 5000 transactions shows a that the advantage isn't completely wiped out by other asymptotic factors (this isn't the only bottleneck in growing the mempool), but it's only a bit proportionally slower (10.8%, 1.12x), which adds evidence that this will be a good change for performance minded users. ``` # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 59.1321, 11.5919, 12.235, 11.7068 # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 52.1307, 10.2641, 10.5206, 10.4306 ``` I don't think it's possible to come up with an example of where a maplinks based design would have better performance, but it's something for reviewers to consider. # Discussion ## Why maplinks in the first place? I spoke with the author of mapLinks (sdaftuar) a while back, and my recollection from our conversation was that it was implemented because he did not know how to resolve the circular dependency at the time, and there was no other reason for making it a separate map. ## Is iterator_to weird? iterator_to is expressly for this purpose, see https://www.boost.org/doc/libs/1_51_0/libs/multi_index/doc/tutorial/indices.html#iterator_to > iterator_to provides a way to retrieve an iterator to an element from a pointer to the element, thus making iterators and pointers interchangeable for the purposes of element pointing (not so for traversal) in many situations. This notwithstanding, it is not the aim of iterator_to to promote the usage of pointers as substitutes for real iterators: the latter are specifically designed for handling the elements of a container, and not only benefit from the iterator orientation of container interfaces, but are also capable of exposing many more programming bugs than raw pointers, both at compile and run time. iterator_to is thus meant to be used in scenarios where access via iterators is not suitable or desireable: > > - Interoperability with preexisting APIs based on pointers or references. > - Publication of pointer-based interfaces (for instance, when designing a C-compatible library). > - The exposure of pointers in place of iterators can act as a type erasure barrier effectively decoupling the user of the code from the implementation detail of which particular container is being used. Similar techniques, like the famous Pimpl idiom, are used in large projects to reduce dependencies and build times. > - Self-referencing contexts where an element acts upon its owner container and no iterator to itself is available. In other words, iterator_to is the perfect tool for the job by the last reason given. Under the hood it should just be a simple pointer cast and have no major runtime overhead (depending on if the function call is inlined). Edit by laanwj: removed at sign from the description ACKs for top commit: jonatack: re-ACK 296be8f per `git range-diff ab338a19 3ba1665 296be8f`, sanity check gcc 10.2 debug build is clean. hebasto: re-ACK 296be8f58e02b39a58f017c52294aceed22c3ffd, only rebased since my [previous](https://github.com/bitcoin/bitcoin/pull/19478#pullrequestreview-482400727) review (verified with `git range-diff`). Tree-SHA512: f5c30a4936fcde6ae32a02823c303b3568a747c2681d11f87df88a149f984a6d3b4c81f391859afbeb68864ef7f6a3d8779f74a58e3de701b3d51f78e498682e
2020-09-07 12:06:38 +02:00
innerUsage += memusage::DynamicUsage(it->GetMemPoolParentsConst()) + memusage::DynamicUsage(it->GetMemPoolChildrenConst());
bool fDependsWait = false;
Merge #19478: Remove CTxMempool::mapLinks data structure member 296be8f58e02b39a58f017c52294aceed22c3ffd Get rid of unused functions CTxMemPool::GetMemPoolChildren, CTxMemPool::GetMemPoolParents (Jeremy Rubin) 46d955d196043cc297834baeebce31ff778dff80 Remove mapLinks in favor of entry inlined structs with iterator type erasure (Jeremy Rubin) Pull request description: Currently we have a peculiar data structure in the mempool called maplinks. Maplinks job is to track the in-pool children and parents of each transaction. This PR can be primarily understood and reviewed as a simple refactoring to remove this extra data structure, although it comes with a nice memory and performance improvement for free. Maplinks is particularly peculiar because removing it is not as simple as just moving it's inner structure to the owning CTxMempoolEntry. Because TxLinks (the class storing the setEntries for parents and children) store txiters to each entry in the mempool corresponding to the parent or child, it means that the TxLinks type is "aware" of the boost multiindex (mapTx) it's coming from, which is in turn, aware of the entry type stored in mapTx. Thus we used maplinks to store this entry associated data we in an entirely separate data structure just to avoid a circular type reference caused by storing a txiter inside a CTxMempoolEntry. It turns out, we can kill this circular reference by making use of iterator_to multiindex function and std::reference_wrapper. This allows us to get rid of the maplinks data structure and move the ownership of the parents/child sets to the entries themselves. The benefit of this good all around, for any of the reasons given below the change would be acceptable, and it doesn't make the code harder to reason about or worse in any respect (as far as I can tell, there's no tradeoff). ### Simpler ownership model No longer having to consistency check that mapLinks did have records for our CTxMempoolEntry, impossible to have a mapLinks entry outlive or incorrectly die before a CTxMempoolEntry. ### Memory Usage We get rid of a O(Transactions) sized map in the mempool, which is a long lived data structure. ### Performance If you have a CTxMemPoolEntry, you immediately know the address of it's children/parents, rather than having to do a O(log(Transactions)) lookup via maplinks (which we do very often). We do it in *so many* places that a true benchmark has to look at a full running node, but it is easy enough to show an improvement in this case. The ComplexMemPool shows a good coherence check that we see the expected result of it being 12.5% faster / 1.14x faster. ``` Before: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.40462, 0.277222, 0.285339, 0.279793 After: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.22586, 0.243831, 0.247076, 0.244596 ``` The ComplexMemPool benchmark only checks doing addUnchecked and TrimToSize for 800 transactions. While this bench does a good job of hammering the relevant types of function, it doesn't test everything. Subbing in 5000 transactions shows a that the advantage isn't completely wiped out by other asymptotic factors (this isn't the only bottleneck in growing the mempool), but it's only a bit proportionally slower (10.8%, 1.12x), which adds evidence that this will be a good change for performance minded users. ``` # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 59.1321, 11.5919, 12.235, 11.7068 # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 52.1307, 10.2641, 10.5206, 10.4306 ``` I don't think it's possible to come up with an example of where a maplinks based design would have better performance, but it's something for reviewers to consider. # Discussion ## Why maplinks in the first place? I spoke with the author of mapLinks (sdaftuar) a while back, and my recollection from our conversation was that it was implemented because he did not know how to resolve the circular dependency at the time, and there was no other reason for making it a separate map. ## Is iterator_to weird? iterator_to is expressly for this purpose, see https://www.boost.org/doc/libs/1_51_0/libs/multi_index/doc/tutorial/indices.html#iterator_to > iterator_to provides a way to retrieve an iterator to an element from a pointer to the element, thus making iterators and pointers interchangeable for the purposes of element pointing (not so for traversal) in many situations. This notwithstanding, it is not the aim of iterator_to to promote the usage of pointers as substitutes for real iterators: the latter are specifically designed for handling the elements of a container, and not only benefit from the iterator orientation of container interfaces, but are also capable of exposing many more programming bugs than raw pointers, both at compile and run time. iterator_to is thus meant to be used in scenarios where access via iterators is not suitable or desireable: > > - Interoperability with preexisting APIs based on pointers or references. > - Publication of pointer-based interfaces (for instance, when designing a C-compatible library). > - The exposure of pointers in place of iterators can act as a type erasure barrier effectively decoupling the user of the code from the implementation detail of which particular container is being used. Similar techniques, like the famous Pimpl idiom, are used in large projects to reduce dependencies and build times. > - Self-referencing contexts where an element acts upon its owner container and no iterator to itself is available. In other words, iterator_to is the perfect tool for the job by the last reason given. Under the hood it should just be a simple pointer cast and have no major runtime overhead (depending on if the function call is inlined). Edit by laanwj: removed at sign from the description ACKs for top commit: jonatack: re-ACK 296be8f per `git range-diff ab338a19 3ba1665 296be8f`, sanity check gcc 10.2 debug build is clean. hebasto: re-ACK 296be8f58e02b39a58f017c52294aceed22c3ffd, only rebased since my [previous](https://github.com/bitcoin/bitcoin/pull/19478#pullrequestreview-482400727) review (verified with `git range-diff`). Tree-SHA512: f5c30a4936fcde6ae32a02823c303b3568a747c2681d11f87df88a149f984a6d3b4c81f391859afbeb68864ef7f6a3d8779f74a58e3de701b3d51f78e498682e
2020-09-07 12:06:38 +02:00
CTxMemPoolEntry::Parents setParentCheck;
for (const CTxIn &txin : tx.vin) {
// Check that every mempool transaction's inputs refer to available coins, or other mempool tx's.
indexed_transaction_set::const_iterator it2 = mapTx.find(txin.prevout.hash);
if (it2 != mapTx.end()) {
const CTransaction& tx2 = it2->GetTx();
assert(tx2.vout.size() > txin.prevout.n && !tx2.vout[txin.prevout.n].IsNull());
fDependsWait = true;
Merge #19478: Remove CTxMempool::mapLinks data structure member 296be8f58e02b39a58f017c52294aceed22c3ffd Get rid of unused functions CTxMemPool::GetMemPoolChildren, CTxMemPool::GetMemPoolParents (Jeremy Rubin) 46d955d196043cc297834baeebce31ff778dff80 Remove mapLinks in favor of entry inlined structs with iterator type erasure (Jeremy Rubin) Pull request description: Currently we have a peculiar data structure in the mempool called maplinks. Maplinks job is to track the in-pool children and parents of each transaction. This PR can be primarily understood and reviewed as a simple refactoring to remove this extra data structure, although it comes with a nice memory and performance improvement for free. Maplinks is particularly peculiar because removing it is not as simple as just moving it's inner structure to the owning CTxMempoolEntry. Because TxLinks (the class storing the setEntries for parents and children) store txiters to each entry in the mempool corresponding to the parent or child, it means that the TxLinks type is "aware" of the boost multiindex (mapTx) it's coming from, which is in turn, aware of the entry type stored in mapTx. Thus we used maplinks to store this entry associated data we in an entirely separate data structure just to avoid a circular type reference caused by storing a txiter inside a CTxMempoolEntry. It turns out, we can kill this circular reference by making use of iterator_to multiindex function and std::reference_wrapper. This allows us to get rid of the maplinks data structure and move the ownership of the parents/child sets to the entries themselves. The benefit of this good all around, for any of the reasons given below the change would be acceptable, and it doesn't make the code harder to reason about or worse in any respect (as far as I can tell, there's no tradeoff). ### Simpler ownership model No longer having to consistency check that mapLinks did have records for our CTxMempoolEntry, impossible to have a mapLinks entry outlive or incorrectly die before a CTxMempoolEntry. ### Memory Usage We get rid of a O(Transactions) sized map in the mempool, which is a long lived data structure. ### Performance If you have a CTxMemPoolEntry, you immediately know the address of it's children/parents, rather than having to do a O(log(Transactions)) lookup via maplinks (which we do very often). We do it in *so many* places that a true benchmark has to look at a full running node, but it is easy enough to show an improvement in this case. The ComplexMemPool shows a good coherence check that we see the expected result of it being 12.5% faster / 1.14x faster. ``` Before: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.40462, 0.277222, 0.285339, 0.279793 After: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.22586, 0.243831, 0.247076, 0.244596 ``` The ComplexMemPool benchmark only checks doing addUnchecked and TrimToSize for 800 transactions. While this bench does a good job of hammering the relevant types of function, it doesn't test everything. Subbing in 5000 transactions shows a that the advantage isn't completely wiped out by other asymptotic factors (this isn't the only bottleneck in growing the mempool), but it's only a bit proportionally slower (10.8%, 1.12x), which adds evidence that this will be a good change for performance minded users. ``` # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 59.1321, 11.5919, 12.235, 11.7068 # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 52.1307, 10.2641, 10.5206, 10.4306 ``` I don't think it's possible to come up with an example of where a maplinks based design would have better performance, but it's something for reviewers to consider. # Discussion ## Why maplinks in the first place? I spoke with the author of mapLinks (sdaftuar) a while back, and my recollection from our conversation was that it was implemented because he did not know how to resolve the circular dependency at the time, and there was no other reason for making it a separate map. ## Is iterator_to weird? iterator_to is expressly for this purpose, see https://www.boost.org/doc/libs/1_51_0/libs/multi_index/doc/tutorial/indices.html#iterator_to > iterator_to provides a way to retrieve an iterator to an element from a pointer to the element, thus making iterators and pointers interchangeable for the purposes of element pointing (not so for traversal) in many situations. This notwithstanding, it is not the aim of iterator_to to promote the usage of pointers as substitutes for real iterators: the latter are specifically designed for handling the elements of a container, and not only benefit from the iterator orientation of container interfaces, but are also capable of exposing many more programming bugs than raw pointers, both at compile and run time. iterator_to is thus meant to be used in scenarios where access via iterators is not suitable or desireable: > > - Interoperability with preexisting APIs based on pointers or references. > - Publication of pointer-based interfaces (for instance, when designing a C-compatible library). > - The exposure of pointers in place of iterators can act as a type erasure barrier effectively decoupling the user of the code from the implementation detail of which particular container is being used. Similar techniques, like the famous Pimpl idiom, are used in large projects to reduce dependencies and build times. > - Self-referencing contexts where an element acts upon its owner container and no iterator to itself is available. In other words, iterator_to is the perfect tool for the job by the last reason given. Under the hood it should just be a simple pointer cast and have no major runtime overhead (depending on if the function call is inlined). Edit by laanwj: removed at sign from the description ACKs for top commit: jonatack: re-ACK 296be8f per `git range-diff ab338a19 3ba1665 296be8f`, sanity check gcc 10.2 debug build is clean. hebasto: re-ACK 296be8f58e02b39a58f017c52294aceed22c3ffd, only rebased since my [previous](https://github.com/bitcoin/bitcoin/pull/19478#pullrequestreview-482400727) review (verified with `git range-diff`). Tree-SHA512: f5c30a4936fcde6ae32a02823c303b3568a747c2681d11f87df88a149f984a6d3b4c81f391859afbeb68864ef7f6a3d8779f74a58e3de701b3d51f78e498682e
2020-09-07 12:06:38 +02:00
setParentCheck.insert(*it2);
} else {
assert(active_coins_tip.HaveCoin(txin.prevout));
}
// Check whether its inputs are marked in mapNextTx.
auto it3 = mapNextTx.find(txin.prevout);
assert(it3 != mapNextTx.end());
assert(it3->first == &txin.prevout);
assert(it3->second == &tx);
i++;
}
Merge #19478: Remove CTxMempool::mapLinks data structure member 296be8f58e02b39a58f017c52294aceed22c3ffd Get rid of unused functions CTxMemPool::GetMemPoolChildren, CTxMemPool::GetMemPoolParents (Jeremy Rubin) 46d955d196043cc297834baeebce31ff778dff80 Remove mapLinks in favor of entry inlined structs with iterator type erasure (Jeremy Rubin) Pull request description: Currently we have a peculiar data structure in the mempool called maplinks. Maplinks job is to track the in-pool children and parents of each transaction. This PR can be primarily understood and reviewed as a simple refactoring to remove this extra data structure, although it comes with a nice memory and performance improvement for free. Maplinks is particularly peculiar because removing it is not as simple as just moving it's inner structure to the owning CTxMempoolEntry. Because TxLinks (the class storing the setEntries for parents and children) store txiters to each entry in the mempool corresponding to the parent or child, it means that the TxLinks type is "aware" of the boost multiindex (mapTx) it's coming from, which is in turn, aware of the entry type stored in mapTx. Thus we used maplinks to store this entry associated data we in an entirely separate data structure just to avoid a circular type reference caused by storing a txiter inside a CTxMempoolEntry. It turns out, we can kill this circular reference by making use of iterator_to multiindex function and std::reference_wrapper. This allows us to get rid of the maplinks data structure and move the ownership of the parents/child sets to the entries themselves. The benefit of this good all around, for any of the reasons given below the change would be acceptable, and it doesn't make the code harder to reason about or worse in any respect (as far as I can tell, there's no tradeoff). ### Simpler ownership model No longer having to consistency check that mapLinks did have records for our CTxMempoolEntry, impossible to have a mapLinks entry outlive or incorrectly die before a CTxMempoolEntry. ### Memory Usage We get rid of a O(Transactions) sized map in the mempool, which is a long lived data structure. ### Performance If you have a CTxMemPoolEntry, you immediately know the address of it's children/parents, rather than having to do a O(log(Transactions)) lookup via maplinks (which we do very often). We do it in *so many* places that a true benchmark has to look at a full running node, but it is easy enough to show an improvement in this case. The ComplexMemPool shows a good coherence check that we see the expected result of it being 12.5% faster / 1.14x faster. ``` Before: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.40462, 0.277222, 0.285339, 0.279793 After: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.22586, 0.243831, 0.247076, 0.244596 ``` The ComplexMemPool benchmark only checks doing addUnchecked and TrimToSize for 800 transactions. While this bench does a good job of hammering the relevant types of function, it doesn't test everything. Subbing in 5000 transactions shows a that the advantage isn't completely wiped out by other asymptotic factors (this isn't the only bottleneck in growing the mempool), but it's only a bit proportionally slower (10.8%, 1.12x), which adds evidence that this will be a good change for performance minded users. ``` # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 59.1321, 11.5919, 12.235, 11.7068 # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 52.1307, 10.2641, 10.5206, 10.4306 ``` I don't think it's possible to come up with an example of where a maplinks based design would have better performance, but it's something for reviewers to consider. # Discussion ## Why maplinks in the first place? I spoke with the author of mapLinks (sdaftuar) a while back, and my recollection from our conversation was that it was implemented because he did not know how to resolve the circular dependency at the time, and there was no other reason for making it a separate map. ## Is iterator_to weird? iterator_to is expressly for this purpose, see https://www.boost.org/doc/libs/1_51_0/libs/multi_index/doc/tutorial/indices.html#iterator_to > iterator_to provides a way to retrieve an iterator to an element from a pointer to the element, thus making iterators and pointers interchangeable for the purposes of element pointing (not so for traversal) in many situations. This notwithstanding, it is not the aim of iterator_to to promote the usage of pointers as substitutes for real iterators: the latter are specifically designed for handling the elements of a container, and not only benefit from the iterator orientation of container interfaces, but are also capable of exposing many more programming bugs than raw pointers, both at compile and run time. iterator_to is thus meant to be used in scenarios where access via iterators is not suitable or desireable: > > - Interoperability with preexisting APIs based on pointers or references. > - Publication of pointer-based interfaces (for instance, when designing a C-compatible library). > - The exposure of pointers in place of iterators can act as a type erasure barrier effectively decoupling the user of the code from the implementation detail of which particular container is being used. Similar techniques, like the famous Pimpl idiom, are used in large projects to reduce dependencies and build times. > - Self-referencing contexts where an element acts upon its owner container and no iterator to itself is available. In other words, iterator_to is the perfect tool for the job by the last reason given. Under the hood it should just be a simple pointer cast and have no major runtime overhead (depending on if the function call is inlined). Edit by laanwj: removed at sign from the description ACKs for top commit: jonatack: re-ACK 296be8f per `git range-diff ab338a19 3ba1665 296be8f`, sanity check gcc 10.2 debug build is clean. hebasto: re-ACK 296be8f58e02b39a58f017c52294aceed22c3ffd, only rebased since my [previous](https://github.com/bitcoin/bitcoin/pull/19478#pullrequestreview-482400727) review (verified with `git range-diff`). Tree-SHA512: f5c30a4936fcde6ae32a02823c303b3568a747c2681d11f87df88a149f984a6d3b4c81f391859afbeb68864ef7f6a3d8779f74a58e3de701b3d51f78e498682e
2020-09-07 12:06:38 +02:00
auto comp = [](const CTxMemPoolEntry& a, const CTxMemPoolEntry& b) -> bool {
return a.GetTx().GetHash() == b.GetTx().GetHash();
};
assert(setParentCheck.size() == it->GetMemPoolParentsConst().size());
assert(std::equal(setParentCheck.begin(), setParentCheck.end(), it->GetMemPoolParentsConst().begin(), comp));
// Verify ancestor state is correct.
setEntries setAncestors;
uint64_t nNoLimit = std::numeric_limits<uint64_t>::max();
std::string dummy;
CalculateMemPoolAncestors(*it, setAncestors, nNoLimit, nNoLimit, nNoLimit, nNoLimit, dummy);
uint64_t nCountCheck = setAncestors.size() + 1;
uint64_t nSizeCheck = it->GetTxSize();
CAmount nFeesCheck = it->GetModifiedFee();
unsigned int nSigOpCheck = it->GetSigOpCount();
for (txiter ancestorIt : setAncestors) {
nSizeCheck += ancestorIt->GetTxSize();
nFeesCheck += ancestorIt->GetModifiedFee();
nSigOpCheck += ancestorIt->GetSigOpCount();
}
assert(it->GetCountWithAncestors() == nCountCheck);
assert(it->GetSizeWithAncestors() == nSizeCheck);
assert(it->GetSigOpCountWithAncestors() == nSigOpCheck);
assert(it->GetModFeesWithAncestors() == nFeesCheck);
// Check children against mapNextTx
Merge #19478: Remove CTxMempool::mapLinks data structure member 296be8f58e02b39a58f017c52294aceed22c3ffd Get rid of unused functions CTxMemPool::GetMemPoolChildren, CTxMemPool::GetMemPoolParents (Jeremy Rubin) 46d955d196043cc297834baeebce31ff778dff80 Remove mapLinks in favor of entry inlined structs with iterator type erasure (Jeremy Rubin) Pull request description: Currently we have a peculiar data structure in the mempool called maplinks. Maplinks job is to track the in-pool children and parents of each transaction. This PR can be primarily understood and reviewed as a simple refactoring to remove this extra data structure, although it comes with a nice memory and performance improvement for free. Maplinks is particularly peculiar because removing it is not as simple as just moving it's inner structure to the owning CTxMempoolEntry. Because TxLinks (the class storing the setEntries for parents and children) store txiters to each entry in the mempool corresponding to the parent or child, it means that the TxLinks type is "aware" of the boost multiindex (mapTx) it's coming from, which is in turn, aware of the entry type stored in mapTx. Thus we used maplinks to store this entry associated data we in an entirely separate data structure just to avoid a circular type reference caused by storing a txiter inside a CTxMempoolEntry. It turns out, we can kill this circular reference by making use of iterator_to multiindex function and std::reference_wrapper. This allows us to get rid of the maplinks data structure and move the ownership of the parents/child sets to the entries themselves. The benefit of this good all around, for any of the reasons given below the change would be acceptable, and it doesn't make the code harder to reason about or worse in any respect (as far as I can tell, there's no tradeoff). ### Simpler ownership model No longer having to consistency check that mapLinks did have records for our CTxMempoolEntry, impossible to have a mapLinks entry outlive or incorrectly die before a CTxMempoolEntry. ### Memory Usage We get rid of a O(Transactions) sized map in the mempool, which is a long lived data structure. ### Performance If you have a CTxMemPoolEntry, you immediately know the address of it's children/parents, rather than having to do a O(log(Transactions)) lookup via maplinks (which we do very often). We do it in *so many* places that a true benchmark has to look at a full running node, but it is easy enough to show an improvement in this case. The ComplexMemPool shows a good coherence check that we see the expected result of it being 12.5% faster / 1.14x faster. ``` Before: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.40462, 0.277222, 0.285339, 0.279793 After: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.22586, 0.243831, 0.247076, 0.244596 ``` The ComplexMemPool benchmark only checks doing addUnchecked and TrimToSize for 800 transactions. While this bench does a good job of hammering the relevant types of function, it doesn't test everything. Subbing in 5000 transactions shows a that the advantage isn't completely wiped out by other asymptotic factors (this isn't the only bottleneck in growing the mempool), but it's only a bit proportionally slower (10.8%, 1.12x), which adds evidence that this will be a good change for performance minded users. ``` # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 59.1321, 11.5919, 12.235, 11.7068 # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 52.1307, 10.2641, 10.5206, 10.4306 ``` I don't think it's possible to come up with an example of where a maplinks based design would have better performance, but it's something for reviewers to consider. # Discussion ## Why maplinks in the first place? I spoke with the author of mapLinks (sdaftuar) a while back, and my recollection from our conversation was that it was implemented because he did not know how to resolve the circular dependency at the time, and there was no other reason for making it a separate map. ## Is iterator_to weird? iterator_to is expressly for this purpose, see https://www.boost.org/doc/libs/1_51_0/libs/multi_index/doc/tutorial/indices.html#iterator_to > iterator_to provides a way to retrieve an iterator to an element from a pointer to the element, thus making iterators and pointers interchangeable for the purposes of element pointing (not so for traversal) in many situations. This notwithstanding, it is not the aim of iterator_to to promote the usage of pointers as substitutes for real iterators: the latter are specifically designed for handling the elements of a container, and not only benefit from the iterator orientation of container interfaces, but are also capable of exposing many more programming bugs than raw pointers, both at compile and run time. iterator_to is thus meant to be used in scenarios where access via iterators is not suitable or desireable: > > - Interoperability with preexisting APIs based on pointers or references. > - Publication of pointer-based interfaces (for instance, when designing a C-compatible library). > - The exposure of pointers in place of iterators can act as a type erasure barrier effectively decoupling the user of the code from the implementation detail of which particular container is being used. Similar techniques, like the famous Pimpl idiom, are used in large projects to reduce dependencies and build times. > - Self-referencing contexts where an element acts upon its owner container and no iterator to itself is available. In other words, iterator_to is the perfect tool for the job by the last reason given. Under the hood it should just be a simple pointer cast and have no major runtime overhead (depending on if the function call is inlined). Edit by laanwj: removed at sign from the description ACKs for top commit: jonatack: re-ACK 296be8f per `git range-diff ab338a19 3ba1665 296be8f`, sanity check gcc 10.2 debug build is clean. hebasto: re-ACK 296be8f58e02b39a58f017c52294aceed22c3ffd, only rebased since my [previous](https://github.com/bitcoin/bitcoin/pull/19478#pullrequestreview-482400727) review (verified with `git range-diff`). Tree-SHA512: f5c30a4936fcde6ae32a02823c303b3568a747c2681d11f87df88a149f984a6d3b4c81f391859afbeb68864ef7f6a3d8779f74a58e3de701b3d51f78e498682e
2020-09-07 12:06:38 +02:00
CTxMemPoolEntry::Children setChildrenCheck;
auto iter = mapNextTx.lower_bound(COutPoint(it->GetTx().GetHash(), 0));
uint64_t child_sizes = 0;
for (; iter != mapNextTx.end() && iter->first->hash == it->GetTx().GetHash(); ++iter) {
txiter childit = mapTx.find(iter->second->GetHash());
assert(childit != mapTx.end()); // mapNextTx points to in-mempool transactions
Merge #19478: Remove CTxMempool::mapLinks data structure member 296be8f58e02b39a58f017c52294aceed22c3ffd Get rid of unused functions CTxMemPool::GetMemPoolChildren, CTxMemPool::GetMemPoolParents (Jeremy Rubin) 46d955d196043cc297834baeebce31ff778dff80 Remove mapLinks in favor of entry inlined structs with iterator type erasure (Jeremy Rubin) Pull request description: Currently we have a peculiar data structure in the mempool called maplinks. Maplinks job is to track the in-pool children and parents of each transaction. This PR can be primarily understood and reviewed as a simple refactoring to remove this extra data structure, although it comes with a nice memory and performance improvement for free. Maplinks is particularly peculiar because removing it is not as simple as just moving it's inner structure to the owning CTxMempoolEntry. Because TxLinks (the class storing the setEntries for parents and children) store txiters to each entry in the mempool corresponding to the parent or child, it means that the TxLinks type is "aware" of the boost multiindex (mapTx) it's coming from, which is in turn, aware of the entry type stored in mapTx. Thus we used maplinks to store this entry associated data we in an entirely separate data structure just to avoid a circular type reference caused by storing a txiter inside a CTxMempoolEntry. It turns out, we can kill this circular reference by making use of iterator_to multiindex function and std::reference_wrapper. This allows us to get rid of the maplinks data structure and move the ownership of the parents/child sets to the entries themselves. The benefit of this good all around, for any of the reasons given below the change would be acceptable, and it doesn't make the code harder to reason about or worse in any respect (as far as I can tell, there's no tradeoff). ### Simpler ownership model No longer having to consistency check that mapLinks did have records for our CTxMempoolEntry, impossible to have a mapLinks entry outlive or incorrectly die before a CTxMempoolEntry. ### Memory Usage We get rid of a O(Transactions) sized map in the mempool, which is a long lived data structure. ### Performance If you have a CTxMemPoolEntry, you immediately know the address of it's children/parents, rather than having to do a O(log(Transactions)) lookup via maplinks (which we do very often). We do it in *so many* places that a true benchmark has to look at a full running node, but it is easy enough to show an improvement in this case. The ComplexMemPool shows a good coherence check that we see the expected result of it being 12.5% faster / 1.14x faster. ``` Before: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.40462, 0.277222, 0.285339, 0.279793 After: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.22586, 0.243831, 0.247076, 0.244596 ``` The ComplexMemPool benchmark only checks doing addUnchecked and TrimToSize for 800 transactions. While this bench does a good job of hammering the relevant types of function, it doesn't test everything. Subbing in 5000 transactions shows a that the advantage isn't completely wiped out by other asymptotic factors (this isn't the only bottleneck in growing the mempool), but it's only a bit proportionally slower (10.8%, 1.12x), which adds evidence that this will be a good change for performance minded users. ``` # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 59.1321, 11.5919, 12.235, 11.7068 # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 52.1307, 10.2641, 10.5206, 10.4306 ``` I don't think it's possible to come up with an example of where a maplinks based design would have better performance, but it's something for reviewers to consider. # Discussion ## Why maplinks in the first place? I spoke with the author of mapLinks (sdaftuar) a while back, and my recollection from our conversation was that it was implemented because he did not know how to resolve the circular dependency at the time, and there was no other reason for making it a separate map. ## Is iterator_to weird? iterator_to is expressly for this purpose, see https://www.boost.org/doc/libs/1_51_0/libs/multi_index/doc/tutorial/indices.html#iterator_to > iterator_to provides a way to retrieve an iterator to an element from a pointer to the element, thus making iterators and pointers interchangeable for the purposes of element pointing (not so for traversal) in many situations. This notwithstanding, it is not the aim of iterator_to to promote the usage of pointers as substitutes for real iterators: the latter are specifically designed for handling the elements of a container, and not only benefit from the iterator orientation of container interfaces, but are also capable of exposing many more programming bugs than raw pointers, both at compile and run time. iterator_to is thus meant to be used in scenarios where access via iterators is not suitable or desireable: > > - Interoperability with preexisting APIs based on pointers or references. > - Publication of pointer-based interfaces (for instance, when designing a C-compatible library). > - The exposure of pointers in place of iterators can act as a type erasure barrier effectively decoupling the user of the code from the implementation detail of which particular container is being used. Similar techniques, like the famous Pimpl idiom, are used in large projects to reduce dependencies and build times. > - Self-referencing contexts where an element acts upon its owner container and no iterator to itself is available. In other words, iterator_to is the perfect tool for the job by the last reason given. Under the hood it should just be a simple pointer cast and have no major runtime overhead (depending on if the function call is inlined). Edit by laanwj: removed at sign from the description ACKs for top commit: jonatack: re-ACK 296be8f per `git range-diff ab338a19 3ba1665 296be8f`, sanity check gcc 10.2 debug build is clean. hebasto: re-ACK 296be8f58e02b39a58f017c52294aceed22c3ffd, only rebased since my [previous](https://github.com/bitcoin/bitcoin/pull/19478#pullrequestreview-482400727) review (verified with `git range-diff`). Tree-SHA512: f5c30a4936fcde6ae32a02823c303b3568a747c2681d11f87df88a149f984a6d3b4c81f391859afbeb68864ef7f6a3d8779f74a58e3de701b3d51f78e498682e
2020-09-07 12:06:38 +02:00
if (setChildrenCheck.insert(*childit).second) {
child_sizes += childit->GetTxSize();
}
}
Merge #19478: Remove CTxMempool::mapLinks data structure member 296be8f58e02b39a58f017c52294aceed22c3ffd Get rid of unused functions CTxMemPool::GetMemPoolChildren, CTxMemPool::GetMemPoolParents (Jeremy Rubin) 46d955d196043cc297834baeebce31ff778dff80 Remove mapLinks in favor of entry inlined structs with iterator type erasure (Jeremy Rubin) Pull request description: Currently we have a peculiar data structure in the mempool called maplinks. Maplinks job is to track the in-pool children and parents of each transaction. This PR can be primarily understood and reviewed as a simple refactoring to remove this extra data structure, although it comes with a nice memory and performance improvement for free. Maplinks is particularly peculiar because removing it is not as simple as just moving it's inner structure to the owning CTxMempoolEntry. Because TxLinks (the class storing the setEntries for parents and children) store txiters to each entry in the mempool corresponding to the parent or child, it means that the TxLinks type is "aware" of the boost multiindex (mapTx) it's coming from, which is in turn, aware of the entry type stored in mapTx. Thus we used maplinks to store this entry associated data we in an entirely separate data structure just to avoid a circular type reference caused by storing a txiter inside a CTxMempoolEntry. It turns out, we can kill this circular reference by making use of iterator_to multiindex function and std::reference_wrapper. This allows us to get rid of the maplinks data structure and move the ownership of the parents/child sets to the entries themselves. The benefit of this good all around, for any of the reasons given below the change would be acceptable, and it doesn't make the code harder to reason about or worse in any respect (as far as I can tell, there's no tradeoff). ### Simpler ownership model No longer having to consistency check that mapLinks did have records for our CTxMempoolEntry, impossible to have a mapLinks entry outlive or incorrectly die before a CTxMempoolEntry. ### Memory Usage We get rid of a O(Transactions) sized map in the mempool, which is a long lived data structure. ### Performance If you have a CTxMemPoolEntry, you immediately know the address of it's children/parents, rather than having to do a O(log(Transactions)) lookup via maplinks (which we do very often). We do it in *so many* places that a true benchmark has to look at a full running node, but it is easy enough to show an improvement in this case. The ComplexMemPool shows a good coherence check that we see the expected result of it being 12.5% faster / 1.14x faster. ``` Before: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.40462, 0.277222, 0.285339, 0.279793 After: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.22586, 0.243831, 0.247076, 0.244596 ``` The ComplexMemPool benchmark only checks doing addUnchecked and TrimToSize for 800 transactions. While this bench does a good job of hammering the relevant types of function, it doesn't test everything. Subbing in 5000 transactions shows a that the advantage isn't completely wiped out by other asymptotic factors (this isn't the only bottleneck in growing the mempool), but it's only a bit proportionally slower (10.8%, 1.12x), which adds evidence that this will be a good change for performance minded users. ``` # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 59.1321, 11.5919, 12.235, 11.7068 # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 52.1307, 10.2641, 10.5206, 10.4306 ``` I don't think it's possible to come up with an example of where a maplinks based design would have better performance, but it's something for reviewers to consider. # Discussion ## Why maplinks in the first place? I spoke with the author of mapLinks (sdaftuar) a while back, and my recollection from our conversation was that it was implemented because he did not know how to resolve the circular dependency at the time, and there was no other reason for making it a separate map. ## Is iterator_to weird? iterator_to is expressly for this purpose, see https://www.boost.org/doc/libs/1_51_0/libs/multi_index/doc/tutorial/indices.html#iterator_to > iterator_to provides a way to retrieve an iterator to an element from a pointer to the element, thus making iterators and pointers interchangeable for the purposes of element pointing (not so for traversal) in many situations. This notwithstanding, it is not the aim of iterator_to to promote the usage of pointers as substitutes for real iterators: the latter are specifically designed for handling the elements of a container, and not only benefit from the iterator orientation of container interfaces, but are also capable of exposing many more programming bugs than raw pointers, both at compile and run time. iterator_to is thus meant to be used in scenarios where access via iterators is not suitable or desireable: > > - Interoperability with preexisting APIs based on pointers or references. > - Publication of pointer-based interfaces (for instance, when designing a C-compatible library). > - The exposure of pointers in place of iterators can act as a type erasure barrier effectively decoupling the user of the code from the implementation detail of which particular container is being used. Similar techniques, like the famous Pimpl idiom, are used in large projects to reduce dependencies and build times. > - Self-referencing contexts where an element acts upon its owner container and no iterator to itself is available. In other words, iterator_to is the perfect tool for the job by the last reason given. Under the hood it should just be a simple pointer cast and have no major runtime overhead (depending on if the function call is inlined). Edit by laanwj: removed at sign from the description ACKs for top commit: jonatack: re-ACK 296be8f per `git range-diff ab338a19 3ba1665 296be8f`, sanity check gcc 10.2 debug build is clean. hebasto: re-ACK 296be8f58e02b39a58f017c52294aceed22c3ffd, only rebased since my [previous](https://github.com/bitcoin/bitcoin/pull/19478#pullrequestreview-482400727) review (verified with `git range-diff`). Tree-SHA512: f5c30a4936fcde6ae32a02823c303b3568a747c2681d11f87df88a149f984a6d3b4c81f391859afbeb68864ef7f6a3d8779f74a58e3de701b3d51f78e498682e
2020-09-07 12:06:38 +02:00
assert(setChildrenCheck.size() == it->GetMemPoolChildrenConst().size());
assert(std::equal(setChildrenCheck.begin(), setChildrenCheck.end(), it->GetMemPoolChildrenConst().begin(), comp));
// Also check to make sure size is greater than sum with immediate children.
// just a sanity check, not definitive that this calc is correct...
assert(it->GetSizeWithDescendants() >= child_sizes + it->GetTxSize());
if (fDependsWait)
waitingOnDependants.push_back(&(*it));
else {
CheckInputsAndUpdateCoins(tx, mempoolDuplicate, spendheight);
}
}
unsigned int stepsSinceLastRemove = 0;
while (!waitingOnDependants.empty()) {
const CTxMemPoolEntry* entry = waitingOnDependants.front();
waitingOnDependants.pop_front();
if (!mempoolDuplicate.HaveInputs(entry->GetTx())) {
waitingOnDependants.push_back(entry);
stepsSinceLastRemove++;
assert(stepsSinceLastRemove < waitingOnDependants.size());
} else {
CheckInputsAndUpdateCoins(entry->GetTx(), mempoolDuplicate, spendheight);
stepsSinceLastRemove = 0;
}
}
for (auto it = mapNextTx.cbegin(); it != mapNextTx.cend(); it++) {
uint256 hash = it->second->GetHash();
indexed_transaction_set::const_iterator it2 = mapTx.find(hash);
const CTransaction& tx = it2->GetTx();
assert(it2 != mapTx.end());
assert(&tx == it->second);
}
assert(totalTxSize == checkTotal);
assert(m_total_fee == check_total_fee);
assert(innerUsage == cachedInnerUsage);
}
bool CTxMemPool::CompareDepthAndScore(const uint256& hasha, const uint256& hashb)
{
LOCK(cs);
indexed_transaction_set::const_iterator i = mapTx.find(hasha);
if (i == mapTx.end()) return false;
indexed_transaction_set::const_iterator j = mapTx.find(hashb);
if (j == mapTx.end()) return true;
uint64_t counta = i->GetCountWithAncestors();
uint64_t countb = j->GetCountWithAncestors();
if (counta == countb) {
return CompareTxMemPoolEntryByScore()(*i, *j);
}
return counta < countb;
}
namespace {
class DepthAndScoreComparator
{
public:
bool operator()(const CTxMemPool::indexed_transaction_set::const_iterator& a, const CTxMemPool::indexed_transaction_set::const_iterator& b)
{
uint64_t counta = a->GetCountWithAncestors();
uint64_t countb = b->GetCountWithAncestors();
if (counta == countb) {
return CompareTxMemPoolEntryByScore()(*a, *b);
}
return counta < countb;
}
};
} // namespace
std::vector<CTxMemPool::indexed_transaction_set::const_iterator> CTxMemPool::GetSortedDepthAndScore() const
{
std::vector<indexed_transaction_set::const_iterator> iters;
AssertLockHeld(cs);
iters.reserve(mapTx.size());
for (indexed_transaction_set::iterator mi = mapTx.begin(); mi != mapTx.end(); ++mi) {
iters.push_back(mi);
}
std::sort(iters.begin(), iters.end(), DepthAndScoreComparator());
return iters;
}
void CTxMemPool::queryHashes(std::vector<uint256>& vtxid) const
{
LOCK(cs);
auto iters = GetSortedDepthAndScore();
vtxid.clear();
vtxid.reserve(mapTx.size());
for (auto it : iters) {
vtxid.push_back(it->GetTx().GetHash());
}
}
static TxMempoolInfo GetInfo(CTxMemPool::indexed_transaction_set::const_iterator it) {
return TxMempoolInfo{it->GetSharedTx(), it->GetTime(), it->GetFee(), it->GetTxSize(), it->GetModifiedFee() - it->GetFee()};
}
std::vector<TxMempoolInfo> CTxMemPool::infoAll() const
{
LOCK(cs);
auto iters = GetSortedDepthAndScore();
std::vector<TxMempoolInfo> ret;
ret.reserve(mapTx.size());
for (auto it : iters) {
ret.push_back(GetInfo(it));
}
return ret;
}
CTransactionRef CTxMemPool::get(const uint256& hash) const
{
LOCK(cs);
indexed_transaction_set::const_iterator i = mapTx.find(hash);
if (i == mapTx.end())
return nullptr;
return i->GetSharedTx();
}
TxMempoolInfo CTxMemPool::info(const uint256& hash) const
{
LOCK(cs);
indexed_transaction_set::const_iterator i = mapTx.find(hash);
if (i == mapTx.end())
return TxMempoolInfo();
return GetInfo(i);
}
bool CTxMemPool::existsProviderTxConflict(const CTransaction &tx) const {
assert(m_dmnman);
LOCK(cs);
auto hasKeyChangeInMempool = [&](const uint256& proTxHash) {
2021-06-23 10:12:11 +02:00
AssertLockHeld(cs);
for (auto its = mapProTxRefs.equal_range(proTxHash); its.first != its.second; ++its.first) {
auto txit = mapTx.find(its.first->second);
if (txit == mapTx.end()) {
continue;
}
if (txit->isKeyChangeProTx) {
return true;
}
}
return false;
};
if (tx.nType == TRANSACTION_PROVIDER_REGISTER) {
const auto opt_proTx = GetTxPayload<CProRegTx>(tx);
if (!opt_proTx) {
LogPrint(BCLog::MEMPOOL, "%s: ERROR: Invalid transaction payload, tx: %s\n", __func__, tx.GetHash().ToString());
return true; // i.e. can't decode payload == conflict
}
auto& proTx = *opt_proTx;
2018-10-25 16:29:50 +02:00
if (mapProTxAddresses.count(proTx.addr) || mapProTxPubKeyIDs.count(proTx.keyIDOwner) || mapProTxBlsPubKeyHashes.count(proTx.pubKeyOperator.GetHash()))
return true;
if (!proTx.collateralOutpoint.hash.IsNull()) {
if (mapProTxCollaterals.count(proTx.collateralOutpoint)) {
// there is another ProRegTx that refers to the same collateral
return true;
}
if (mapNextTx.count(proTx.collateralOutpoint)) {
// there is another tx that spends the collateral
return true;
}
}
2018-10-25 16:29:50 +02:00
return false;
} else if (tx.nType == TRANSACTION_PROVIDER_UPDATE_SERVICE) {
const auto opt_proTx = GetTxPayload<CProUpServTx>(tx);
if (!opt_proTx) {
LogPrint(BCLog::MEMPOOL, "%s: ERROR: Invalid transaction payload, tx: %s\n", __func__, tx.GetHash().ToString());
return true; // i.e. can't decode payload == conflict
}
auto it = mapProTxAddresses.find(opt_proTx->addr);
return it != mapProTxAddresses.end() && it->second != opt_proTx->proTxHash;
} else if (tx.nType == TRANSACTION_PROVIDER_UPDATE_REGISTRAR) {
const auto opt_proTx = GetTxPayload<CProUpRegTx>(tx);
if (!opt_proTx) {
LogPrint(BCLog::MEMPOOL, "%s: ERROR: Invalid transaction payload, tx: %s\n", __func__, tx.GetHash().ToString());
return true; // i.e. can't decode payload == conflict
}
auto& proTx = *opt_proTx;
// this method should only be called with validated ProTxs
auto dmn = m_dmnman->GetListAtChainTip().GetMN(proTx.proTxHash);
if (!dmn) {
2020-01-28 11:04:47 +01:00
LogPrint(BCLog::MEMPOOL, "%s: ERROR: Masternode is not in the list, proTxHash: %s\n", __func__, proTx.proTxHash.ToString());
return true; // i.e. failed to find validated ProTx == conflict
}
// only allow one operator key change in the mempool
if (dmn->pdmnState->pubKeyOperator != proTx.pubKeyOperator) {
if (hasKeyChangeInMempool(proTx.proTxHash)) {
return true;
}
}
auto it = mapProTxBlsPubKeyHashes.find(proTx.pubKeyOperator.GetHash());
return it != mapProTxBlsPubKeyHashes.end() && it->second != proTx.proTxHash;
} else if (tx.nType == TRANSACTION_PROVIDER_UPDATE_REVOKE) {
const auto opt_proTx = GetTxPayload<CProUpRevTx>(tx);
if (!opt_proTx) {
LogPrint(BCLog::MEMPOOL, "%s: ERROR: Invalid transaction payload, tx: %s\n", __func__, tx.GetHash().ToString());
return true; // i.e. can't decode payload == conflict
}
auto& proTx = *opt_proTx;
// this method should only be called with validated ProTxs
auto dmn = m_dmnman->GetListAtChainTip().GetMN(proTx.proTxHash);
if (!dmn) {
2020-01-28 11:04:47 +01:00
LogPrint(BCLog::MEMPOOL, "%s: ERROR: Masternode is not in the list, proTxHash: %s\n", __func__, proTx.proTxHash.ToString());
return true; // i.e. failed to find validated ProTx == conflict
}
// only allow one operator key change in the mempool
if (dmn->pdmnState->pubKeyOperator.Get() != CBLSPublicKey()) {
if (hasKeyChangeInMempool(proTx.proTxHash)) {
return true;
}
}
}
return false;
}
void CTxMemPool::PrioritiseTransaction(const uint256& hash, const CAmount& nFeeDelta)
{
{
LOCK(cs);
CAmount &delta = mapDeltas[hash];
delta += nFeeDelta;
txiter it = mapTx.find(hash);
if (it != mapTx.end()) {
mapTx.modify(it, update_fee_delta(delta));
// Now update all ancestors' modified fees with descendants
setEntries setAncestors;
uint64_t nNoLimit = std::numeric_limits<uint64_t>::max();
std::string dummy;
CalculateMemPoolAncestors(*it, setAncestors, nNoLimit, nNoLimit, nNoLimit, nNoLimit, dummy, false);
for (txiter ancestorIt : setAncestors) {
mapTx.modify(ancestorIt, update_descendant_state(0, nFeeDelta, 0));
}
// Now update all descendants' modified fees with ancestors
setEntries setDescendants;
CalculateDescendants(it, setDescendants);
setDescendants.erase(it);
for (txiter descendantIt : setDescendants) {
mapTx.modify(descendantIt, update_ancestor_state(0, nFeeDelta, 0, 0));
}
++nTransactionsUpdated;
}
}
LogPrint(BCLog::MEMPOOL, "PrioritiseTransaction: %s feerate += %s\n", hash.ToString(), FormatMoney(nFeeDelta));
}
Merge #19854: Avoid locking CTxMemPool::cs recursively in simple cases 020f0519ec66d9626255b938e1c6c3f7f9aa4017 refactor: CTxMemPool::IsUnbroadcastTx() requires CTxMemPool::cs lock (Hennadii Stepanov) 7c4bd0387a01a0c3e2938d530dba3c882e4d8f2b refactor: CTxMemPool::GetTotalTxSize() requires CTxMemPool::cs lock (Hennadii Stepanov) fa5fcb032b6ed04c49ee465235288b8059fa805e refactor: CTxMemPool::ClearPrioritisation() requires CTxMemPool::cs lock (Hennadii Stepanov) 7140b31b90cbd84d75eedb3e395d0d55f83b5b95 refactor: CTxMemPool::ApplyDelta() requires CTxMemPool::cs lock (Hennadii Stepanov) 66e47e5e506043fbb9b4e487b44bf992985709c9 refactor: CTxMemPool::UpdateChild() requires CTxMemPool::cs lock (Hennadii Stepanov) 939807768acd508932f2efabee660d56324a73df refactor: CTxMemPool::UpdateParent() requires CTxMemPool::cs lock (Hennadii Stepanov) Pull request description: This is another step to transit `CTxMemPool::cs` from `RecursiveMutex` to `Mutex`. Split out from #19306. Only trivial thread safety annotations and lock assertions added. No new locks. No behavior change. Refactoring `const uint256` to `const uint256&` was [requested](https://github.com/bitcoin/bitcoin/pull/19647#discussion_r468471022) by **promag**. Please note that now, since #19668 has been merged, it is safe to apply `AssertLockHeld()` macros as they do not swallow compile time Thread Safety Analysis warnings. ACKs for top commit: promag: Core review ACK 020f0519ec66d9626255b938e1c6c3f7f9aa4017. jnewbery: Code review ACK 020f0519ec66d9626255b938e1c6c3f7f9aa4017 vasild: ACK 020f0519e Tree-SHA512: a31e389142d5a19b25fef0aaf1072a337278564528b5cc9209df88ae548a31440e1b8dd9bae0169fd7aa59ea06e22fe5e0413955386512b83ef1f3e7d941e890
2020-09-04 12:34:38 +02:00
void CTxMemPool::ApplyDelta(const uint256& hash, CAmount &nFeeDelta) const
{
Merge #19854: Avoid locking CTxMemPool::cs recursively in simple cases 020f0519ec66d9626255b938e1c6c3f7f9aa4017 refactor: CTxMemPool::IsUnbroadcastTx() requires CTxMemPool::cs lock (Hennadii Stepanov) 7c4bd0387a01a0c3e2938d530dba3c882e4d8f2b refactor: CTxMemPool::GetTotalTxSize() requires CTxMemPool::cs lock (Hennadii Stepanov) fa5fcb032b6ed04c49ee465235288b8059fa805e refactor: CTxMemPool::ClearPrioritisation() requires CTxMemPool::cs lock (Hennadii Stepanov) 7140b31b90cbd84d75eedb3e395d0d55f83b5b95 refactor: CTxMemPool::ApplyDelta() requires CTxMemPool::cs lock (Hennadii Stepanov) 66e47e5e506043fbb9b4e487b44bf992985709c9 refactor: CTxMemPool::UpdateChild() requires CTxMemPool::cs lock (Hennadii Stepanov) 939807768acd508932f2efabee660d56324a73df refactor: CTxMemPool::UpdateParent() requires CTxMemPool::cs lock (Hennadii Stepanov) Pull request description: This is another step to transit `CTxMemPool::cs` from `RecursiveMutex` to `Mutex`. Split out from #19306. Only trivial thread safety annotations and lock assertions added. No new locks. No behavior change. Refactoring `const uint256` to `const uint256&` was [requested](https://github.com/bitcoin/bitcoin/pull/19647#discussion_r468471022) by **promag**. Please note that now, since #19668 has been merged, it is safe to apply `AssertLockHeld()` macros as they do not swallow compile time Thread Safety Analysis warnings. ACKs for top commit: promag: Core review ACK 020f0519ec66d9626255b938e1c6c3f7f9aa4017. jnewbery: Code review ACK 020f0519ec66d9626255b938e1c6c3f7f9aa4017 vasild: ACK 020f0519e Tree-SHA512: a31e389142d5a19b25fef0aaf1072a337278564528b5cc9209df88ae548a31440e1b8dd9bae0169fd7aa59ea06e22fe5e0413955386512b83ef1f3e7d941e890
2020-09-04 12:34:38 +02:00
AssertLockHeld(cs);
std::map<uint256, CAmount>::const_iterator pos = mapDeltas.find(hash);
if (pos == mapDeltas.end())
return;
const CAmount &delta = pos->second;
nFeeDelta += delta;
}
Merge #19854: Avoid locking CTxMemPool::cs recursively in simple cases 020f0519ec66d9626255b938e1c6c3f7f9aa4017 refactor: CTxMemPool::IsUnbroadcastTx() requires CTxMemPool::cs lock (Hennadii Stepanov) 7c4bd0387a01a0c3e2938d530dba3c882e4d8f2b refactor: CTxMemPool::GetTotalTxSize() requires CTxMemPool::cs lock (Hennadii Stepanov) fa5fcb032b6ed04c49ee465235288b8059fa805e refactor: CTxMemPool::ClearPrioritisation() requires CTxMemPool::cs lock (Hennadii Stepanov) 7140b31b90cbd84d75eedb3e395d0d55f83b5b95 refactor: CTxMemPool::ApplyDelta() requires CTxMemPool::cs lock (Hennadii Stepanov) 66e47e5e506043fbb9b4e487b44bf992985709c9 refactor: CTxMemPool::UpdateChild() requires CTxMemPool::cs lock (Hennadii Stepanov) 939807768acd508932f2efabee660d56324a73df refactor: CTxMemPool::UpdateParent() requires CTxMemPool::cs lock (Hennadii Stepanov) Pull request description: This is another step to transit `CTxMemPool::cs` from `RecursiveMutex` to `Mutex`. Split out from #19306. Only trivial thread safety annotations and lock assertions added. No new locks. No behavior change. Refactoring `const uint256` to `const uint256&` was [requested](https://github.com/bitcoin/bitcoin/pull/19647#discussion_r468471022) by **promag**. Please note that now, since #19668 has been merged, it is safe to apply `AssertLockHeld()` macros as they do not swallow compile time Thread Safety Analysis warnings. ACKs for top commit: promag: Core review ACK 020f0519ec66d9626255b938e1c6c3f7f9aa4017. jnewbery: Code review ACK 020f0519ec66d9626255b938e1c6c3f7f9aa4017 vasild: ACK 020f0519e Tree-SHA512: a31e389142d5a19b25fef0aaf1072a337278564528b5cc9209df88ae548a31440e1b8dd9bae0169fd7aa59ea06e22fe5e0413955386512b83ef1f3e7d941e890
2020-09-04 12:34:38 +02:00
void CTxMemPool::ClearPrioritisation(const uint256& hash)
{
Merge #19854: Avoid locking CTxMemPool::cs recursively in simple cases 020f0519ec66d9626255b938e1c6c3f7f9aa4017 refactor: CTxMemPool::IsUnbroadcastTx() requires CTxMemPool::cs lock (Hennadii Stepanov) 7c4bd0387a01a0c3e2938d530dba3c882e4d8f2b refactor: CTxMemPool::GetTotalTxSize() requires CTxMemPool::cs lock (Hennadii Stepanov) fa5fcb032b6ed04c49ee465235288b8059fa805e refactor: CTxMemPool::ClearPrioritisation() requires CTxMemPool::cs lock (Hennadii Stepanov) 7140b31b90cbd84d75eedb3e395d0d55f83b5b95 refactor: CTxMemPool::ApplyDelta() requires CTxMemPool::cs lock (Hennadii Stepanov) 66e47e5e506043fbb9b4e487b44bf992985709c9 refactor: CTxMemPool::UpdateChild() requires CTxMemPool::cs lock (Hennadii Stepanov) 939807768acd508932f2efabee660d56324a73df refactor: CTxMemPool::UpdateParent() requires CTxMemPool::cs lock (Hennadii Stepanov) Pull request description: This is another step to transit `CTxMemPool::cs` from `RecursiveMutex` to `Mutex`. Split out from #19306. Only trivial thread safety annotations and lock assertions added. No new locks. No behavior change. Refactoring `const uint256` to `const uint256&` was [requested](https://github.com/bitcoin/bitcoin/pull/19647#discussion_r468471022) by **promag**. Please note that now, since #19668 has been merged, it is safe to apply `AssertLockHeld()` macros as they do not swallow compile time Thread Safety Analysis warnings. ACKs for top commit: promag: Core review ACK 020f0519ec66d9626255b938e1c6c3f7f9aa4017. jnewbery: Code review ACK 020f0519ec66d9626255b938e1c6c3f7f9aa4017 vasild: ACK 020f0519e Tree-SHA512: a31e389142d5a19b25fef0aaf1072a337278564528b5cc9209df88ae548a31440e1b8dd9bae0169fd7aa59ea06e22fe5e0413955386512b83ef1f3e7d941e890
2020-09-04 12:34:38 +02:00
AssertLockHeld(cs);
mapDeltas.erase(hash);
}
const CTransaction* CTxMemPool::GetConflictTx(const COutPoint& prevout) const
{
const auto it = mapNextTx.find(prevout);
return it == mapNextTx.end() ? nullptr : it->second;
}
std::optional<CTxMemPool::txiter> CTxMemPool::GetIter(const uint256& txid) const
{
auto it = mapTx.find(txid);
if (it != mapTx.end()) return it;
return std::nullopt;
}
CTxMemPool::setEntries CTxMemPool::GetIterSet(const std::set<uint256>& hashes) const
{
CTxMemPool::setEntries ret;
for (const auto& h : hashes) {
const auto mi = GetIter(h);
if (mi) ret.insert(*mi);
}
return ret;
}
bool CTxMemPool::HasNoInputsOf(const CTransaction &tx) const
{
for (unsigned int i = 0; i < tx.vin.size(); i++)
if (exists(tx.vin[i].prevout.hash))
return false;
return true;
}
estimatefee / estimatepriority RPC methods New RPC methods: return an estimate of the fee (or priority) a transaction needs to be likely to confirm in a given number of blocks. Mike Hearn created the first version of this method for estimating fees. It works as follows: For transactions that took 1 to N (I picked N=25) blocks to confirm, keep N buckets with at most 100 entries in each recording the fees-per-kilobyte paid by those transactions. (separate buckets are kept for transactions that confirmed because they are high-priority) The buckets are filled as blocks are found, and are saved/restored in a new fee_estiamtes.dat file in the data directory. A few variations on Mike's initial scheme: To estimate the fee needed for a transaction to confirm in X buckets, all of the samples in all of the buckets are used and a median of all of the data is used to make the estimate. For example, imagine 25 buckets each containing the full 100 entries. Those 2,500 samples are sorted, and the estimate of the fee needed to confirm in the very next block is the 50'th-highest-fee-entry in that sorted list; the estimate of the fee needed to confirm in the next two blocks is the 150'th-highest-fee-entry, etc. That algorithm has the nice property that estimates of how much fee you need to pay to get confirmed in block N will always be greater than or equal to the estimate for block N+1. It would clearly be wrong to say "pay 11 uBTC and you'll get confirmed in 3 blocks, but pay 12 uBTC and it will take LONGER". A single block will not contribute more than 10 entries to any one bucket, so a single miner and a large block cannot overwhelm the estimates.
2014-03-17 13:19:54 +01:00
CCoinsViewMemPool::CCoinsViewMemPool(CCoinsView* baseIn, const CTxMemPool& mempoolIn) : CCoinsViewBacked(baseIn), mempool(mempoolIn) { }
Merge #10195: Switch chainstate db and cache to per-txout model 589827975 scripted-diff: various renames for per-utxo consistency (Pieter Wuille) a5e02bc7f Increase travis unit test timeout (Pieter Wuille) 73de2c1ff Rename CCoinsCacheEntry::coins to coin (Pieter Wuille) 119e552f7 Merge CCoinsViewCache's GetOutputFor and AccessCoin (Pieter Wuille) 580b02309 [MOVEONLY] Move old CCoins class to txdb.cpp (Pieter Wuille) 8b25d2c0c Upgrade from per-tx database to per-txout (Pieter Wuille) b2af357f3 Reduce reserved memory space for flushing (Pieter Wuille) 41aa5b79a Pack Coin more tightly (Pieter Wuille) 97072d668 Remove unused CCoins methods (Pieter Wuille) ce23efaa5 Extend coins_tests (Pieter Wuille) 508307968 Switch CCoinsView and chainstate db from per-txid to per-txout (Pieter Wuille) 4ec0d9e79 Refactor GetUTXOStats in preparation for per-COutPoint iteration (Pieter Wuille) 13870b56f Replace CCoins-based CTxMemPool::pruneSpent with isSpent (Pieter Wuille) 05293f3cb Remove ModifyCoins/ModifyNewCoins (Pieter Wuille) 961e48397 Switch tests from ModifyCoins to AddCoin/SpendCoin (Pieter Wuille) 8b3868c1b Switch CScriptCheck to use Coin instead of CCoins (Pieter Wuille) c87b957a3 Only pass things committed to by tx's witness hash to CScriptCheck (Matt Corallo) f68cdfe92 Switch from per-tx to per-txout CCoinsViewCache methods in some places (Pieter Wuille) 000391132 Introduce new per-txout CCoinsViewCache functions (Pieter Wuille) bd83111a0 Optimization: Coin&& to ApplyTxInUndo (Pieter Wuille) cb2c7fdac Replace CTxInUndo with Coin (Pieter Wuille) 422634e2f Introduce Coin, a single unspent output (Pieter Wuille) 7d991b55d Store/allow tx metadata in all undo records (Pieter Wuille) c3aa0c119 Report on-disk size in gettxoutsetinfo (Pieter Wuille) d34242430 Remove/ignore tx version in utxo and undo (Pieter Wuille) 7e0032290 Add specialization of SipHash for 256 + 32 bit data (Pieter Wuille) e484652fc Introduce CHashVerifier to hash read data (Pieter Wuille) f54580e7e error() in disconnect for disk corruption, not inconsistency (Pieter Wuille) e66dbde6d Add SizeEstimate to CDBBatch (Pieter Wuille) Tree-SHA512: ce1fb1e40c77d38915cd02189fab7a8b125c7f44d425c85579d872c3bede3a437760997907c99d7b3017ced1c2de54b2ac7223d99d83a6658fe5ef61edef1de3
2017-06-02 00:47:58 +02:00
bool CCoinsViewMemPool::GetCoin(const COutPoint &outpoint, Coin &coin) const {
// Check to see if the inputs are made available by another tx in the package.
// These Coins would not be available in the underlying CoinsView.
if (auto it = m_temp_added.find(outpoint); it != m_temp_added.end()) {
coin = it->second;
return true;
}
// If an entry in the mempool exists, always return that one, as it's guaranteed to never
// conflict with the underlying cache, and it cannot have pruned entries (as it contains full)
// transactions. First checking the underlying cache risks returning a pruned entry instead.
CTransactionRef ptx = mempool.get(outpoint.hash);
if (ptx) {
if (outpoint.n < ptx->vout.size()) {
coin = Coin(ptx->vout[outpoint.n], MEMPOOL_HEIGHT, false);
Merge #10195: Switch chainstate db and cache to per-txout model 589827975 scripted-diff: various renames for per-utxo consistency (Pieter Wuille) a5e02bc7f Increase travis unit test timeout (Pieter Wuille) 73de2c1ff Rename CCoinsCacheEntry::coins to coin (Pieter Wuille) 119e552f7 Merge CCoinsViewCache's GetOutputFor and AccessCoin (Pieter Wuille) 580b02309 [MOVEONLY] Move old CCoins class to txdb.cpp (Pieter Wuille) 8b25d2c0c Upgrade from per-tx database to per-txout (Pieter Wuille) b2af357f3 Reduce reserved memory space for flushing (Pieter Wuille) 41aa5b79a Pack Coin more tightly (Pieter Wuille) 97072d668 Remove unused CCoins methods (Pieter Wuille) ce23efaa5 Extend coins_tests (Pieter Wuille) 508307968 Switch CCoinsView and chainstate db from per-txid to per-txout (Pieter Wuille) 4ec0d9e79 Refactor GetUTXOStats in preparation for per-COutPoint iteration (Pieter Wuille) 13870b56f Replace CCoins-based CTxMemPool::pruneSpent with isSpent (Pieter Wuille) 05293f3cb Remove ModifyCoins/ModifyNewCoins (Pieter Wuille) 961e48397 Switch tests from ModifyCoins to AddCoin/SpendCoin (Pieter Wuille) 8b3868c1b Switch CScriptCheck to use Coin instead of CCoins (Pieter Wuille) c87b957a3 Only pass things committed to by tx's witness hash to CScriptCheck (Matt Corallo) f68cdfe92 Switch from per-tx to per-txout CCoinsViewCache methods in some places (Pieter Wuille) 000391132 Introduce new per-txout CCoinsViewCache functions (Pieter Wuille) bd83111a0 Optimization: Coin&& to ApplyTxInUndo (Pieter Wuille) cb2c7fdac Replace CTxInUndo with Coin (Pieter Wuille) 422634e2f Introduce Coin, a single unspent output (Pieter Wuille) 7d991b55d Store/allow tx metadata in all undo records (Pieter Wuille) c3aa0c119 Report on-disk size in gettxoutsetinfo (Pieter Wuille) d34242430 Remove/ignore tx version in utxo and undo (Pieter Wuille) 7e0032290 Add specialization of SipHash for 256 + 32 bit data (Pieter Wuille) e484652fc Introduce CHashVerifier to hash read data (Pieter Wuille) f54580e7e error() in disconnect for disk corruption, not inconsistency (Pieter Wuille) e66dbde6d Add SizeEstimate to CDBBatch (Pieter Wuille) Tree-SHA512: ce1fb1e40c77d38915cd02189fab7a8b125c7f44d425c85579d872c3bede3a437760997907c99d7b3017ced1c2de54b2ac7223d99d83a6658fe5ef61edef1de3
2017-06-02 00:47:58 +02:00
return true;
} else {
return false;
}
}
return base->GetCoin(outpoint, coin);
}
void CCoinsViewMemPool::PackageAddTransaction(const CTransactionRef& tx)
{
for (unsigned int n = 0; n < tx->vout.size(); ++n) {
m_temp_added.emplace(COutPoint(tx->GetHash(), n), Coin(tx->vout[n], MEMPOOL_HEIGHT, false));
}
}
size_t CTxMemPool::DynamicMemoryUsage() const {
LOCK(cs);
// Estimate the overhead of mapTx to be 12 pointers + an allocation, as no exact formula for boost::multi_index_contained is implemented.
Merge #19478: Remove CTxMempool::mapLinks data structure member 296be8f58e02b39a58f017c52294aceed22c3ffd Get rid of unused functions CTxMemPool::GetMemPoolChildren, CTxMemPool::GetMemPoolParents (Jeremy Rubin) 46d955d196043cc297834baeebce31ff778dff80 Remove mapLinks in favor of entry inlined structs with iterator type erasure (Jeremy Rubin) Pull request description: Currently we have a peculiar data structure in the mempool called maplinks. Maplinks job is to track the in-pool children and parents of each transaction. This PR can be primarily understood and reviewed as a simple refactoring to remove this extra data structure, although it comes with a nice memory and performance improvement for free. Maplinks is particularly peculiar because removing it is not as simple as just moving it's inner structure to the owning CTxMempoolEntry. Because TxLinks (the class storing the setEntries for parents and children) store txiters to each entry in the mempool corresponding to the parent or child, it means that the TxLinks type is "aware" of the boost multiindex (mapTx) it's coming from, which is in turn, aware of the entry type stored in mapTx. Thus we used maplinks to store this entry associated data we in an entirely separate data structure just to avoid a circular type reference caused by storing a txiter inside a CTxMempoolEntry. It turns out, we can kill this circular reference by making use of iterator_to multiindex function and std::reference_wrapper. This allows us to get rid of the maplinks data structure and move the ownership of the parents/child sets to the entries themselves. The benefit of this good all around, for any of the reasons given below the change would be acceptable, and it doesn't make the code harder to reason about or worse in any respect (as far as I can tell, there's no tradeoff). ### Simpler ownership model No longer having to consistency check that mapLinks did have records for our CTxMempoolEntry, impossible to have a mapLinks entry outlive or incorrectly die before a CTxMempoolEntry. ### Memory Usage We get rid of a O(Transactions) sized map in the mempool, which is a long lived data structure. ### Performance If you have a CTxMemPoolEntry, you immediately know the address of it's children/parents, rather than having to do a O(log(Transactions)) lookup via maplinks (which we do very often). We do it in *so many* places that a true benchmark has to look at a full running node, but it is easy enough to show an improvement in this case. The ComplexMemPool shows a good coherence check that we see the expected result of it being 12.5% faster / 1.14x faster. ``` Before: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.40462, 0.277222, 0.285339, 0.279793 After: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.22586, 0.243831, 0.247076, 0.244596 ``` The ComplexMemPool benchmark only checks doing addUnchecked and TrimToSize for 800 transactions. While this bench does a good job of hammering the relevant types of function, it doesn't test everything. Subbing in 5000 transactions shows a that the advantage isn't completely wiped out by other asymptotic factors (this isn't the only bottleneck in growing the mempool), but it's only a bit proportionally slower (10.8%, 1.12x), which adds evidence that this will be a good change for performance minded users. ``` # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 59.1321, 11.5919, 12.235, 11.7068 # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 52.1307, 10.2641, 10.5206, 10.4306 ``` I don't think it's possible to come up with an example of where a maplinks based design would have better performance, but it's something for reviewers to consider. # Discussion ## Why maplinks in the first place? I spoke with the author of mapLinks (sdaftuar) a while back, and my recollection from our conversation was that it was implemented because he did not know how to resolve the circular dependency at the time, and there was no other reason for making it a separate map. ## Is iterator_to weird? iterator_to is expressly for this purpose, see https://www.boost.org/doc/libs/1_51_0/libs/multi_index/doc/tutorial/indices.html#iterator_to > iterator_to provides a way to retrieve an iterator to an element from a pointer to the element, thus making iterators and pointers interchangeable for the purposes of element pointing (not so for traversal) in many situations. This notwithstanding, it is not the aim of iterator_to to promote the usage of pointers as substitutes for real iterators: the latter are specifically designed for handling the elements of a container, and not only benefit from the iterator orientation of container interfaces, but are also capable of exposing many more programming bugs than raw pointers, both at compile and run time. iterator_to is thus meant to be used in scenarios where access via iterators is not suitable or desireable: > > - Interoperability with preexisting APIs based on pointers or references. > - Publication of pointer-based interfaces (for instance, when designing a C-compatible library). > - The exposure of pointers in place of iterators can act as a type erasure barrier effectively decoupling the user of the code from the implementation detail of which particular container is being used. Similar techniques, like the famous Pimpl idiom, are used in large projects to reduce dependencies and build times. > - Self-referencing contexts where an element acts upon its owner container and no iterator to itself is available. In other words, iterator_to is the perfect tool for the job by the last reason given. Under the hood it should just be a simple pointer cast and have no major runtime overhead (depending on if the function call is inlined). Edit by laanwj: removed at sign from the description ACKs for top commit: jonatack: re-ACK 296be8f per `git range-diff ab338a19 3ba1665 296be8f`, sanity check gcc 10.2 debug build is clean. hebasto: re-ACK 296be8f58e02b39a58f017c52294aceed22c3ffd, only rebased since my [previous](https://github.com/bitcoin/bitcoin/pull/19478#pullrequestreview-482400727) review (verified with `git range-diff`). Tree-SHA512: f5c30a4936fcde6ae32a02823c303b3568a747c2681d11f87df88a149f984a6d3b4c81f391859afbeb68864ef7f6a3d8779f74a58e3de701b3d51f78e498682e
2020-09-07 12:06:38 +02:00
return memusage::MallocUsage(sizeof(CTxMemPoolEntry) + 12 * sizeof(void*)) * mapTx.size() + memusage::DynamicUsage(mapNextTx) + memusage::DynamicUsage(mapDeltas) + memusage::DynamicUsage(vTxHashes) + cachedInnerUsage;
}
void CTxMemPool::RemoveUnbroadcastTx(const uint256& txid, const bool unchecked) {
LOCK(cs);
if (m_unbroadcast_txids.erase(txid))
{
LogPrint(BCLog::MEMPOOL, "Removed %i from set of unbroadcast txns%s\n", txid.GetHex(), (unchecked ? " before confirmation that txn was sent out" : ""));
}
}
void CTxMemPool::RemoveStaged(setEntries &stage, bool updateDescendants, MemPoolRemovalReason reason) {
AssertLockHeld(cs);
UpdateForRemoveFromMempool(stage, updateDescendants);
for (txiter it : stage) {
removeUnchecked(it, reason);
}
}
int CTxMemPool::Expire(std::chrono::seconds time)
{
AssertLockHeld(cs);
indexed_transaction_set::index<entry_time>::type::iterator it = mapTx.get<entry_time>().begin();
setEntries toremove;
while (it != mapTx.get<entry_time>().end() && it->GetTime() < time) {
// locked txes do not expire until mined and have sufficient confirmations
Remove legacy InstantSend code (#3020) * Remove ppszTypeName from protocol.cpp and reimplement GetCommand This removes the need to carefully maintain ppszTypeName, which required correct order and also did not allow to permanently remove old message types. To get the command name for an INV type, GetCommandInternal uses a switch which needs to be maintained from now on. The way this is implemented also resembles the way it is implemented in Bitcoin today, but it's not identical. The original PR that introduced the switch case in Bitcoin was part of the Segwit changes and thus never got backported. I decided to implement it in a slightly different way that avoids throwing exceptions when an unknown INV type is encountered. IsKnownType will now also leverage GetCommandInternal() to figure out if the INV type is known locally. This has the side effect of old/legacy message types to return false from now on. We will depend on this side effect in later commits when we remove legacy InstantSend code. * Stop handling/relaying legacy IX messages When we receive an IX message, we simply treat it as a regular TX and relay it as such. We'll however still request IX messages when they are announced to us. We can't simply revert to requesting TX messages in this case as it might result in the other peer not answering due to the TX not being in mapRelay yet. We should at some point in the future completely drop handling of IX messages instead. * Remove IsNewInstantSendEnabled() and only use IsInstantSendEnabled() * Remove legacy InstantSend from GUI * Remove InstantSend from Bitcoin/Dash URIs * Remove legacy InstantSend from RPC commands * Remove legacy InstantSend from wallet * Remove legacy instantsend.h include * Remove legacy InstantSend from validation code * Completely remove remaining legacy InstantSend code * Remove now unused spork * Fix InstantSend related test failures * Remove now obsolete auto IS tests * Make spork2 and spork3 disabled by default This should have no influence on mainnet as these sporks are actually set there. This will however affect regtest, which shouldn't have LLMQ based InstantSend enabled by default. * Remove instantsend tests from dip3-deterministicmns.py These were only testing legacy InstantSend * Fix .QCheckBox#checkUsePrivateSend styling a bit * s/TXLEGACYLOCKREQUEST/LEGACYTXLOCKREQUEST/ * Revert "verified via InstantSend" back to "verified via LLMQ based InstantSend" * Use cmd == nullptr instead of !cmd * Remove last parameter from AvailableCoins call This was for fUseInstantSend which is not present anymore since rebase
2019-07-09 16:50:08 +02:00
if (llmq::quorumInstantSendManager->IsLocked(it->GetTx().GetHash())) {
it++;
continue;
}
toremove.insert(mapTx.project<0>(it));
it++;
}
setEntries stage;
for (txiter removeit : toremove) {
CalculateDescendants(removeit, stage);
}
RemoveStaged(stage, false, MemPoolRemovalReason::EXPIRY);
return stage.size();
}
void CTxMemPool::addUnchecked(const CTxMemPoolEntry &entry, bool validFeeEstimate)
{
setEntries setAncestors;
uint64_t nNoLimit = std::numeric_limits<uint64_t>::max();
std::string dummy;
CalculateMemPoolAncestors(entry, setAncestors, nNoLimit, nNoLimit, nNoLimit, nNoLimit, dummy);
return addUnchecked(entry, setAncestors, validFeeEstimate);
}
void CTxMemPool::UpdateChild(txiter entry, txiter child, bool add)
{
Merge #19854: Avoid locking CTxMemPool::cs recursively in simple cases 020f0519ec66d9626255b938e1c6c3f7f9aa4017 refactor: CTxMemPool::IsUnbroadcastTx() requires CTxMemPool::cs lock (Hennadii Stepanov) 7c4bd0387a01a0c3e2938d530dba3c882e4d8f2b refactor: CTxMemPool::GetTotalTxSize() requires CTxMemPool::cs lock (Hennadii Stepanov) fa5fcb032b6ed04c49ee465235288b8059fa805e refactor: CTxMemPool::ClearPrioritisation() requires CTxMemPool::cs lock (Hennadii Stepanov) 7140b31b90cbd84d75eedb3e395d0d55f83b5b95 refactor: CTxMemPool::ApplyDelta() requires CTxMemPool::cs lock (Hennadii Stepanov) 66e47e5e506043fbb9b4e487b44bf992985709c9 refactor: CTxMemPool::UpdateChild() requires CTxMemPool::cs lock (Hennadii Stepanov) 939807768acd508932f2efabee660d56324a73df refactor: CTxMemPool::UpdateParent() requires CTxMemPool::cs lock (Hennadii Stepanov) Pull request description: This is another step to transit `CTxMemPool::cs` from `RecursiveMutex` to `Mutex`. Split out from #19306. Only trivial thread safety annotations and lock assertions added. No new locks. No behavior change. Refactoring `const uint256` to `const uint256&` was [requested](https://github.com/bitcoin/bitcoin/pull/19647#discussion_r468471022) by **promag**. Please note that now, since #19668 has been merged, it is safe to apply `AssertLockHeld()` macros as they do not swallow compile time Thread Safety Analysis warnings. ACKs for top commit: promag: Core review ACK 020f0519ec66d9626255b938e1c6c3f7f9aa4017. jnewbery: Code review ACK 020f0519ec66d9626255b938e1c6c3f7f9aa4017 vasild: ACK 020f0519e Tree-SHA512: a31e389142d5a19b25fef0aaf1072a337278564528b5cc9209df88ae548a31440e1b8dd9bae0169fd7aa59ea06e22fe5e0413955386512b83ef1f3e7d941e890
2020-09-04 12:34:38 +02:00
AssertLockHeld(cs);
Merge #19478: Remove CTxMempool::mapLinks data structure member 296be8f58e02b39a58f017c52294aceed22c3ffd Get rid of unused functions CTxMemPool::GetMemPoolChildren, CTxMemPool::GetMemPoolParents (Jeremy Rubin) 46d955d196043cc297834baeebce31ff778dff80 Remove mapLinks in favor of entry inlined structs with iterator type erasure (Jeremy Rubin) Pull request description: Currently we have a peculiar data structure in the mempool called maplinks. Maplinks job is to track the in-pool children and parents of each transaction. This PR can be primarily understood and reviewed as a simple refactoring to remove this extra data structure, although it comes with a nice memory and performance improvement for free. Maplinks is particularly peculiar because removing it is not as simple as just moving it's inner structure to the owning CTxMempoolEntry. Because TxLinks (the class storing the setEntries for parents and children) store txiters to each entry in the mempool corresponding to the parent or child, it means that the TxLinks type is "aware" of the boost multiindex (mapTx) it's coming from, which is in turn, aware of the entry type stored in mapTx. Thus we used maplinks to store this entry associated data we in an entirely separate data structure just to avoid a circular type reference caused by storing a txiter inside a CTxMempoolEntry. It turns out, we can kill this circular reference by making use of iterator_to multiindex function and std::reference_wrapper. This allows us to get rid of the maplinks data structure and move the ownership of the parents/child sets to the entries themselves. The benefit of this good all around, for any of the reasons given below the change would be acceptable, and it doesn't make the code harder to reason about or worse in any respect (as far as I can tell, there's no tradeoff). ### Simpler ownership model No longer having to consistency check that mapLinks did have records for our CTxMempoolEntry, impossible to have a mapLinks entry outlive or incorrectly die before a CTxMempoolEntry. ### Memory Usage We get rid of a O(Transactions) sized map in the mempool, which is a long lived data structure. ### Performance If you have a CTxMemPoolEntry, you immediately know the address of it's children/parents, rather than having to do a O(log(Transactions)) lookup via maplinks (which we do very often). We do it in *so many* places that a true benchmark has to look at a full running node, but it is easy enough to show an improvement in this case. The ComplexMemPool shows a good coherence check that we see the expected result of it being 12.5% faster / 1.14x faster. ``` Before: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.40462, 0.277222, 0.285339, 0.279793 After: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.22586, 0.243831, 0.247076, 0.244596 ``` The ComplexMemPool benchmark only checks doing addUnchecked and TrimToSize for 800 transactions. While this bench does a good job of hammering the relevant types of function, it doesn't test everything. Subbing in 5000 transactions shows a that the advantage isn't completely wiped out by other asymptotic factors (this isn't the only bottleneck in growing the mempool), but it's only a bit proportionally slower (10.8%, 1.12x), which adds evidence that this will be a good change for performance minded users. ``` # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 59.1321, 11.5919, 12.235, 11.7068 # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 52.1307, 10.2641, 10.5206, 10.4306 ``` I don't think it's possible to come up with an example of where a maplinks based design would have better performance, but it's something for reviewers to consider. # Discussion ## Why maplinks in the first place? I spoke with the author of mapLinks (sdaftuar) a while back, and my recollection from our conversation was that it was implemented because he did not know how to resolve the circular dependency at the time, and there was no other reason for making it a separate map. ## Is iterator_to weird? iterator_to is expressly for this purpose, see https://www.boost.org/doc/libs/1_51_0/libs/multi_index/doc/tutorial/indices.html#iterator_to > iterator_to provides a way to retrieve an iterator to an element from a pointer to the element, thus making iterators and pointers interchangeable for the purposes of element pointing (not so for traversal) in many situations. This notwithstanding, it is not the aim of iterator_to to promote the usage of pointers as substitutes for real iterators: the latter are specifically designed for handling the elements of a container, and not only benefit from the iterator orientation of container interfaces, but are also capable of exposing many more programming bugs than raw pointers, both at compile and run time. iterator_to is thus meant to be used in scenarios where access via iterators is not suitable or desireable: > > - Interoperability with preexisting APIs based on pointers or references. > - Publication of pointer-based interfaces (for instance, when designing a C-compatible library). > - The exposure of pointers in place of iterators can act as a type erasure barrier effectively decoupling the user of the code from the implementation detail of which particular container is being used. Similar techniques, like the famous Pimpl idiom, are used in large projects to reduce dependencies and build times. > - Self-referencing contexts where an element acts upon its owner container and no iterator to itself is available. In other words, iterator_to is the perfect tool for the job by the last reason given. Under the hood it should just be a simple pointer cast and have no major runtime overhead (depending on if the function call is inlined). Edit by laanwj: removed at sign from the description ACKs for top commit: jonatack: re-ACK 296be8f per `git range-diff ab338a19 3ba1665 296be8f`, sanity check gcc 10.2 debug build is clean. hebasto: re-ACK 296be8f58e02b39a58f017c52294aceed22c3ffd, only rebased since my [previous](https://github.com/bitcoin/bitcoin/pull/19478#pullrequestreview-482400727) review (verified with `git range-diff`). Tree-SHA512: f5c30a4936fcde6ae32a02823c303b3568a747c2681d11f87df88a149f984a6d3b4c81f391859afbeb68864ef7f6a3d8779f74a58e3de701b3d51f78e498682e
2020-09-07 12:06:38 +02:00
CTxMemPoolEntry::Children s;
if (add && entry->GetMemPoolChildren().insert(*child).second) {
cachedInnerUsage += memusage::IncrementalDynamicUsage(s);
Merge #19478: Remove CTxMempool::mapLinks data structure member 296be8f58e02b39a58f017c52294aceed22c3ffd Get rid of unused functions CTxMemPool::GetMemPoolChildren, CTxMemPool::GetMemPoolParents (Jeremy Rubin) 46d955d196043cc297834baeebce31ff778dff80 Remove mapLinks in favor of entry inlined structs with iterator type erasure (Jeremy Rubin) Pull request description: Currently we have a peculiar data structure in the mempool called maplinks. Maplinks job is to track the in-pool children and parents of each transaction. This PR can be primarily understood and reviewed as a simple refactoring to remove this extra data structure, although it comes with a nice memory and performance improvement for free. Maplinks is particularly peculiar because removing it is not as simple as just moving it's inner structure to the owning CTxMempoolEntry. Because TxLinks (the class storing the setEntries for parents and children) store txiters to each entry in the mempool corresponding to the parent or child, it means that the TxLinks type is "aware" of the boost multiindex (mapTx) it's coming from, which is in turn, aware of the entry type stored in mapTx. Thus we used maplinks to store this entry associated data we in an entirely separate data structure just to avoid a circular type reference caused by storing a txiter inside a CTxMempoolEntry. It turns out, we can kill this circular reference by making use of iterator_to multiindex function and std::reference_wrapper. This allows us to get rid of the maplinks data structure and move the ownership of the parents/child sets to the entries themselves. The benefit of this good all around, for any of the reasons given below the change would be acceptable, and it doesn't make the code harder to reason about or worse in any respect (as far as I can tell, there's no tradeoff). ### Simpler ownership model No longer having to consistency check that mapLinks did have records for our CTxMempoolEntry, impossible to have a mapLinks entry outlive or incorrectly die before a CTxMempoolEntry. ### Memory Usage We get rid of a O(Transactions) sized map in the mempool, which is a long lived data structure. ### Performance If you have a CTxMemPoolEntry, you immediately know the address of it's children/parents, rather than having to do a O(log(Transactions)) lookup via maplinks (which we do very often). We do it in *so many* places that a true benchmark has to look at a full running node, but it is easy enough to show an improvement in this case. The ComplexMemPool shows a good coherence check that we see the expected result of it being 12.5% faster / 1.14x faster. ``` Before: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.40462, 0.277222, 0.285339, 0.279793 After: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.22586, 0.243831, 0.247076, 0.244596 ``` The ComplexMemPool benchmark only checks doing addUnchecked and TrimToSize for 800 transactions. While this bench does a good job of hammering the relevant types of function, it doesn't test everything. Subbing in 5000 transactions shows a that the advantage isn't completely wiped out by other asymptotic factors (this isn't the only bottleneck in growing the mempool), but it's only a bit proportionally slower (10.8%, 1.12x), which adds evidence that this will be a good change for performance minded users. ``` # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 59.1321, 11.5919, 12.235, 11.7068 # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 52.1307, 10.2641, 10.5206, 10.4306 ``` I don't think it's possible to come up with an example of where a maplinks based design would have better performance, but it's something for reviewers to consider. # Discussion ## Why maplinks in the first place? I spoke with the author of mapLinks (sdaftuar) a while back, and my recollection from our conversation was that it was implemented because he did not know how to resolve the circular dependency at the time, and there was no other reason for making it a separate map. ## Is iterator_to weird? iterator_to is expressly for this purpose, see https://www.boost.org/doc/libs/1_51_0/libs/multi_index/doc/tutorial/indices.html#iterator_to > iterator_to provides a way to retrieve an iterator to an element from a pointer to the element, thus making iterators and pointers interchangeable for the purposes of element pointing (not so for traversal) in many situations. This notwithstanding, it is not the aim of iterator_to to promote the usage of pointers as substitutes for real iterators: the latter are specifically designed for handling the elements of a container, and not only benefit from the iterator orientation of container interfaces, but are also capable of exposing many more programming bugs than raw pointers, both at compile and run time. iterator_to is thus meant to be used in scenarios where access via iterators is not suitable or desireable: > > - Interoperability with preexisting APIs based on pointers or references. > - Publication of pointer-based interfaces (for instance, when designing a C-compatible library). > - The exposure of pointers in place of iterators can act as a type erasure barrier effectively decoupling the user of the code from the implementation detail of which particular container is being used. Similar techniques, like the famous Pimpl idiom, are used in large projects to reduce dependencies and build times. > - Self-referencing contexts where an element acts upon its owner container and no iterator to itself is available. In other words, iterator_to is the perfect tool for the job by the last reason given. Under the hood it should just be a simple pointer cast and have no major runtime overhead (depending on if the function call is inlined). Edit by laanwj: removed at sign from the description ACKs for top commit: jonatack: re-ACK 296be8f per `git range-diff ab338a19 3ba1665 296be8f`, sanity check gcc 10.2 debug build is clean. hebasto: re-ACK 296be8f58e02b39a58f017c52294aceed22c3ffd, only rebased since my [previous](https://github.com/bitcoin/bitcoin/pull/19478#pullrequestreview-482400727) review (verified with `git range-diff`). Tree-SHA512: f5c30a4936fcde6ae32a02823c303b3568a747c2681d11f87df88a149f984a6d3b4c81f391859afbeb68864ef7f6a3d8779f74a58e3de701b3d51f78e498682e
2020-09-07 12:06:38 +02:00
} else if (!add && entry->GetMemPoolChildren().erase(*child)) {
cachedInnerUsage -= memusage::IncrementalDynamicUsage(s);
}
}
void CTxMemPool::UpdateParent(txiter entry, txiter parent, bool add)
{
Merge #19854: Avoid locking CTxMemPool::cs recursively in simple cases 020f0519ec66d9626255b938e1c6c3f7f9aa4017 refactor: CTxMemPool::IsUnbroadcastTx() requires CTxMemPool::cs lock (Hennadii Stepanov) 7c4bd0387a01a0c3e2938d530dba3c882e4d8f2b refactor: CTxMemPool::GetTotalTxSize() requires CTxMemPool::cs lock (Hennadii Stepanov) fa5fcb032b6ed04c49ee465235288b8059fa805e refactor: CTxMemPool::ClearPrioritisation() requires CTxMemPool::cs lock (Hennadii Stepanov) 7140b31b90cbd84d75eedb3e395d0d55f83b5b95 refactor: CTxMemPool::ApplyDelta() requires CTxMemPool::cs lock (Hennadii Stepanov) 66e47e5e506043fbb9b4e487b44bf992985709c9 refactor: CTxMemPool::UpdateChild() requires CTxMemPool::cs lock (Hennadii Stepanov) 939807768acd508932f2efabee660d56324a73df refactor: CTxMemPool::UpdateParent() requires CTxMemPool::cs lock (Hennadii Stepanov) Pull request description: This is another step to transit `CTxMemPool::cs` from `RecursiveMutex` to `Mutex`. Split out from #19306. Only trivial thread safety annotations and lock assertions added. No new locks. No behavior change. Refactoring `const uint256` to `const uint256&` was [requested](https://github.com/bitcoin/bitcoin/pull/19647#discussion_r468471022) by **promag**. Please note that now, since #19668 has been merged, it is safe to apply `AssertLockHeld()` macros as they do not swallow compile time Thread Safety Analysis warnings. ACKs for top commit: promag: Core review ACK 020f0519ec66d9626255b938e1c6c3f7f9aa4017. jnewbery: Code review ACK 020f0519ec66d9626255b938e1c6c3f7f9aa4017 vasild: ACK 020f0519e Tree-SHA512: a31e389142d5a19b25fef0aaf1072a337278564528b5cc9209df88ae548a31440e1b8dd9bae0169fd7aa59ea06e22fe5e0413955386512b83ef1f3e7d941e890
2020-09-04 12:34:38 +02:00
AssertLockHeld(cs);
Merge #19478: Remove CTxMempool::mapLinks data structure member 296be8f58e02b39a58f017c52294aceed22c3ffd Get rid of unused functions CTxMemPool::GetMemPoolChildren, CTxMemPool::GetMemPoolParents (Jeremy Rubin) 46d955d196043cc297834baeebce31ff778dff80 Remove mapLinks in favor of entry inlined structs with iterator type erasure (Jeremy Rubin) Pull request description: Currently we have a peculiar data structure in the mempool called maplinks. Maplinks job is to track the in-pool children and parents of each transaction. This PR can be primarily understood and reviewed as a simple refactoring to remove this extra data structure, although it comes with a nice memory and performance improvement for free. Maplinks is particularly peculiar because removing it is not as simple as just moving it's inner structure to the owning CTxMempoolEntry. Because TxLinks (the class storing the setEntries for parents and children) store txiters to each entry in the mempool corresponding to the parent or child, it means that the TxLinks type is "aware" of the boost multiindex (mapTx) it's coming from, which is in turn, aware of the entry type stored in mapTx. Thus we used maplinks to store this entry associated data we in an entirely separate data structure just to avoid a circular type reference caused by storing a txiter inside a CTxMempoolEntry. It turns out, we can kill this circular reference by making use of iterator_to multiindex function and std::reference_wrapper. This allows us to get rid of the maplinks data structure and move the ownership of the parents/child sets to the entries themselves. The benefit of this good all around, for any of the reasons given below the change would be acceptable, and it doesn't make the code harder to reason about or worse in any respect (as far as I can tell, there's no tradeoff). ### Simpler ownership model No longer having to consistency check that mapLinks did have records for our CTxMempoolEntry, impossible to have a mapLinks entry outlive or incorrectly die before a CTxMempoolEntry. ### Memory Usage We get rid of a O(Transactions) sized map in the mempool, which is a long lived data structure. ### Performance If you have a CTxMemPoolEntry, you immediately know the address of it's children/parents, rather than having to do a O(log(Transactions)) lookup via maplinks (which we do very often). We do it in *so many* places that a true benchmark has to look at a full running node, but it is easy enough to show an improvement in this case. The ComplexMemPool shows a good coherence check that we see the expected result of it being 12.5% faster / 1.14x faster. ``` Before: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.40462, 0.277222, 0.285339, 0.279793 After: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.22586, 0.243831, 0.247076, 0.244596 ``` The ComplexMemPool benchmark only checks doing addUnchecked and TrimToSize for 800 transactions. While this bench does a good job of hammering the relevant types of function, it doesn't test everything. Subbing in 5000 transactions shows a that the advantage isn't completely wiped out by other asymptotic factors (this isn't the only bottleneck in growing the mempool), but it's only a bit proportionally slower (10.8%, 1.12x), which adds evidence that this will be a good change for performance minded users. ``` # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 59.1321, 11.5919, 12.235, 11.7068 # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 52.1307, 10.2641, 10.5206, 10.4306 ``` I don't think it's possible to come up with an example of where a maplinks based design would have better performance, but it's something for reviewers to consider. # Discussion ## Why maplinks in the first place? I spoke with the author of mapLinks (sdaftuar) a while back, and my recollection from our conversation was that it was implemented because he did not know how to resolve the circular dependency at the time, and there was no other reason for making it a separate map. ## Is iterator_to weird? iterator_to is expressly for this purpose, see https://www.boost.org/doc/libs/1_51_0/libs/multi_index/doc/tutorial/indices.html#iterator_to > iterator_to provides a way to retrieve an iterator to an element from a pointer to the element, thus making iterators and pointers interchangeable for the purposes of element pointing (not so for traversal) in many situations. This notwithstanding, it is not the aim of iterator_to to promote the usage of pointers as substitutes for real iterators: the latter are specifically designed for handling the elements of a container, and not only benefit from the iterator orientation of container interfaces, but are also capable of exposing many more programming bugs than raw pointers, both at compile and run time. iterator_to is thus meant to be used in scenarios where access via iterators is not suitable or desireable: > > - Interoperability with preexisting APIs based on pointers or references. > - Publication of pointer-based interfaces (for instance, when designing a C-compatible library). > - The exposure of pointers in place of iterators can act as a type erasure barrier effectively decoupling the user of the code from the implementation detail of which particular container is being used. Similar techniques, like the famous Pimpl idiom, are used in large projects to reduce dependencies and build times. > - Self-referencing contexts where an element acts upon its owner container and no iterator to itself is available. In other words, iterator_to is the perfect tool for the job by the last reason given. Under the hood it should just be a simple pointer cast and have no major runtime overhead (depending on if the function call is inlined). Edit by laanwj: removed at sign from the description ACKs for top commit: jonatack: re-ACK 296be8f per `git range-diff ab338a19 3ba1665 296be8f`, sanity check gcc 10.2 debug build is clean. hebasto: re-ACK 296be8f58e02b39a58f017c52294aceed22c3ffd, only rebased since my [previous](https://github.com/bitcoin/bitcoin/pull/19478#pullrequestreview-482400727) review (verified with `git range-diff`). Tree-SHA512: f5c30a4936fcde6ae32a02823c303b3568a747c2681d11f87df88a149f984a6d3b4c81f391859afbeb68864ef7f6a3d8779f74a58e3de701b3d51f78e498682e
2020-09-07 12:06:38 +02:00
CTxMemPoolEntry::Parents s;
if (add && entry->GetMemPoolParents().insert(*parent).second) {
cachedInnerUsage += memusage::IncrementalDynamicUsage(s);
Merge #19478: Remove CTxMempool::mapLinks data structure member 296be8f58e02b39a58f017c52294aceed22c3ffd Get rid of unused functions CTxMemPool::GetMemPoolChildren, CTxMemPool::GetMemPoolParents (Jeremy Rubin) 46d955d196043cc297834baeebce31ff778dff80 Remove mapLinks in favor of entry inlined structs with iterator type erasure (Jeremy Rubin) Pull request description: Currently we have a peculiar data structure in the mempool called maplinks. Maplinks job is to track the in-pool children and parents of each transaction. This PR can be primarily understood and reviewed as a simple refactoring to remove this extra data structure, although it comes with a nice memory and performance improvement for free. Maplinks is particularly peculiar because removing it is not as simple as just moving it's inner structure to the owning CTxMempoolEntry. Because TxLinks (the class storing the setEntries for parents and children) store txiters to each entry in the mempool corresponding to the parent or child, it means that the TxLinks type is "aware" of the boost multiindex (mapTx) it's coming from, which is in turn, aware of the entry type stored in mapTx. Thus we used maplinks to store this entry associated data we in an entirely separate data structure just to avoid a circular type reference caused by storing a txiter inside a CTxMempoolEntry. It turns out, we can kill this circular reference by making use of iterator_to multiindex function and std::reference_wrapper. This allows us to get rid of the maplinks data structure and move the ownership of the parents/child sets to the entries themselves. The benefit of this good all around, for any of the reasons given below the change would be acceptable, and it doesn't make the code harder to reason about or worse in any respect (as far as I can tell, there's no tradeoff). ### Simpler ownership model No longer having to consistency check that mapLinks did have records for our CTxMempoolEntry, impossible to have a mapLinks entry outlive or incorrectly die before a CTxMempoolEntry. ### Memory Usage We get rid of a O(Transactions) sized map in the mempool, which is a long lived data structure. ### Performance If you have a CTxMemPoolEntry, you immediately know the address of it's children/parents, rather than having to do a O(log(Transactions)) lookup via maplinks (which we do very often). We do it in *so many* places that a true benchmark has to look at a full running node, but it is easy enough to show an improvement in this case. The ComplexMemPool shows a good coherence check that we see the expected result of it being 12.5% faster / 1.14x faster. ``` Before: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.40462, 0.277222, 0.285339, 0.279793 After: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.22586, 0.243831, 0.247076, 0.244596 ``` The ComplexMemPool benchmark only checks doing addUnchecked and TrimToSize for 800 transactions. While this bench does a good job of hammering the relevant types of function, it doesn't test everything. Subbing in 5000 transactions shows a that the advantage isn't completely wiped out by other asymptotic factors (this isn't the only bottleneck in growing the mempool), but it's only a bit proportionally slower (10.8%, 1.12x), which adds evidence that this will be a good change for performance minded users. ``` # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 59.1321, 11.5919, 12.235, 11.7068 # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 52.1307, 10.2641, 10.5206, 10.4306 ``` I don't think it's possible to come up with an example of where a maplinks based design would have better performance, but it's something for reviewers to consider. # Discussion ## Why maplinks in the first place? I spoke with the author of mapLinks (sdaftuar) a while back, and my recollection from our conversation was that it was implemented because he did not know how to resolve the circular dependency at the time, and there was no other reason for making it a separate map. ## Is iterator_to weird? iterator_to is expressly for this purpose, see https://www.boost.org/doc/libs/1_51_0/libs/multi_index/doc/tutorial/indices.html#iterator_to > iterator_to provides a way to retrieve an iterator to an element from a pointer to the element, thus making iterators and pointers interchangeable for the purposes of element pointing (not so for traversal) in many situations. This notwithstanding, it is not the aim of iterator_to to promote the usage of pointers as substitutes for real iterators: the latter are specifically designed for handling the elements of a container, and not only benefit from the iterator orientation of container interfaces, but are also capable of exposing many more programming bugs than raw pointers, both at compile and run time. iterator_to is thus meant to be used in scenarios where access via iterators is not suitable or desireable: > > - Interoperability with preexisting APIs based on pointers or references. > - Publication of pointer-based interfaces (for instance, when designing a C-compatible library). > - The exposure of pointers in place of iterators can act as a type erasure barrier effectively decoupling the user of the code from the implementation detail of which particular container is being used. Similar techniques, like the famous Pimpl idiom, are used in large projects to reduce dependencies and build times. > - Self-referencing contexts where an element acts upon its owner container and no iterator to itself is available. In other words, iterator_to is the perfect tool for the job by the last reason given. Under the hood it should just be a simple pointer cast and have no major runtime overhead (depending on if the function call is inlined). Edit by laanwj: removed at sign from the description ACKs for top commit: jonatack: re-ACK 296be8f per `git range-diff ab338a19 3ba1665 296be8f`, sanity check gcc 10.2 debug build is clean. hebasto: re-ACK 296be8f58e02b39a58f017c52294aceed22c3ffd, only rebased since my [previous](https://github.com/bitcoin/bitcoin/pull/19478#pullrequestreview-482400727) review (verified with `git range-diff`). Tree-SHA512: f5c30a4936fcde6ae32a02823c303b3568a747c2681d11f87df88a149f984a6d3b4c81f391859afbeb68864ef7f6a3d8779f74a58e3de701b3d51f78e498682e
2020-09-07 12:06:38 +02:00
} else if (!add && entry->GetMemPoolParents().erase(*parent)) {
cachedInnerUsage -= memusage::IncrementalDynamicUsage(s);
}
}
CFeeRate CTxMemPool::GetMinFee(size_t sizelimit) const {
LOCK(cs);
if (!blockSinceLastRollingFeeBump || rollingMinimumFeeRate == 0)
return CFeeRate(llround(rollingMinimumFeeRate));
int64_t time = GetTime();
if (time > lastRollingFeeUpdate + 10) {
double halflife = ROLLING_FEE_HALFLIFE;
if (DynamicMemoryUsage() < sizelimit / 4)
halflife /= 4;
else if (DynamicMemoryUsage() < sizelimit / 2)
halflife /= 2;
rollingMinimumFeeRate = rollingMinimumFeeRate / pow(2.0, (time - lastRollingFeeUpdate) / halflife);
lastRollingFeeUpdate = time;
if (rollingMinimumFeeRate < (double)incrementalRelayFee.GetFeePerK() / 2) {
rollingMinimumFeeRate = 0;
return CFeeRate(0);
}
}
return std::max(CFeeRate(llround(rollingMinimumFeeRate)), incrementalRelayFee);
}
void CTxMemPool::trackPackageRemoved(const CFeeRate& rate) {
AssertLockHeld(cs);
if (rate.GetFeePerK() > rollingMinimumFeeRate) {
rollingMinimumFeeRate = rate.GetFeePerK();
blockSinceLastRollingFeeBump = false;
}
}
Merge #10195: Switch chainstate db and cache to per-txout model 589827975 scripted-diff: various renames for per-utxo consistency (Pieter Wuille) a5e02bc7f Increase travis unit test timeout (Pieter Wuille) 73de2c1ff Rename CCoinsCacheEntry::coins to coin (Pieter Wuille) 119e552f7 Merge CCoinsViewCache's GetOutputFor and AccessCoin (Pieter Wuille) 580b02309 [MOVEONLY] Move old CCoins class to txdb.cpp (Pieter Wuille) 8b25d2c0c Upgrade from per-tx database to per-txout (Pieter Wuille) b2af357f3 Reduce reserved memory space for flushing (Pieter Wuille) 41aa5b79a Pack Coin more tightly (Pieter Wuille) 97072d668 Remove unused CCoins methods (Pieter Wuille) ce23efaa5 Extend coins_tests (Pieter Wuille) 508307968 Switch CCoinsView and chainstate db from per-txid to per-txout (Pieter Wuille) 4ec0d9e79 Refactor GetUTXOStats in preparation for per-COutPoint iteration (Pieter Wuille) 13870b56f Replace CCoins-based CTxMemPool::pruneSpent with isSpent (Pieter Wuille) 05293f3cb Remove ModifyCoins/ModifyNewCoins (Pieter Wuille) 961e48397 Switch tests from ModifyCoins to AddCoin/SpendCoin (Pieter Wuille) 8b3868c1b Switch CScriptCheck to use Coin instead of CCoins (Pieter Wuille) c87b957a3 Only pass things committed to by tx's witness hash to CScriptCheck (Matt Corallo) f68cdfe92 Switch from per-tx to per-txout CCoinsViewCache methods in some places (Pieter Wuille) 000391132 Introduce new per-txout CCoinsViewCache functions (Pieter Wuille) bd83111a0 Optimization: Coin&& to ApplyTxInUndo (Pieter Wuille) cb2c7fdac Replace CTxInUndo with Coin (Pieter Wuille) 422634e2f Introduce Coin, a single unspent output (Pieter Wuille) 7d991b55d Store/allow tx metadata in all undo records (Pieter Wuille) c3aa0c119 Report on-disk size in gettxoutsetinfo (Pieter Wuille) d34242430 Remove/ignore tx version in utxo and undo (Pieter Wuille) 7e0032290 Add specialization of SipHash for 256 + 32 bit data (Pieter Wuille) e484652fc Introduce CHashVerifier to hash read data (Pieter Wuille) f54580e7e error() in disconnect for disk corruption, not inconsistency (Pieter Wuille) e66dbde6d Add SizeEstimate to CDBBatch (Pieter Wuille) Tree-SHA512: ce1fb1e40c77d38915cd02189fab7a8b125c7f44d425c85579d872c3bede3a437760997907c99d7b3017ced1c2de54b2ac7223d99d83a6658fe5ef61edef1de3
2017-06-02 00:47:58 +02:00
void CTxMemPool::TrimToSize(size_t sizelimit, std::vector<COutPoint>* pvNoSpendsRemaining) {
AssertLockHeld(cs);
unsigned nTxnRemoved = 0;
CFeeRate maxFeeRateRemoved(0);
while (!mapTx.empty() && DynamicMemoryUsage() > sizelimit) {
indexed_transaction_set::index<descendant_score>::type::iterator it = mapTx.get<descendant_score>().begin();
// We set the new mempool min fee to the feerate of the removed set, plus the
// "minimum reasonable fee rate" (ie some value under which we consider txn
// to have 0 fee). This way, we don't allow txn to enter mempool with feerate
// equal to txn which were removed with no block in between.
CFeeRate removed(it->GetModFeesWithDescendants(), it->GetSizeWithDescendants());
removed += incrementalRelayFee;
trackPackageRemoved(removed);
maxFeeRateRemoved = std::max(maxFeeRateRemoved, removed);
setEntries stage;
CalculateDescendants(mapTx.project<0>(it), stage);
nTxnRemoved += stage.size();
std::vector<CTransaction> txn;
if (pvNoSpendsRemaining) {
txn.reserve(stage.size());
for (txiter iter : stage)
txn.push_back(iter->GetTx());
}
RemoveStaged(stage, false, MemPoolRemovalReason::SIZELIMIT);
if (pvNoSpendsRemaining) {
for (const CTransaction& tx : txn) {
for (const CTxIn& txin : tx.vin) {
Merge #10195: Switch chainstate db and cache to per-txout model 589827975 scripted-diff: various renames for per-utxo consistency (Pieter Wuille) a5e02bc7f Increase travis unit test timeout (Pieter Wuille) 73de2c1ff Rename CCoinsCacheEntry::coins to coin (Pieter Wuille) 119e552f7 Merge CCoinsViewCache's GetOutputFor and AccessCoin (Pieter Wuille) 580b02309 [MOVEONLY] Move old CCoins class to txdb.cpp (Pieter Wuille) 8b25d2c0c Upgrade from per-tx database to per-txout (Pieter Wuille) b2af357f3 Reduce reserved memory space for flushing (Pieter Wuille) 41aa5b79a Pack Coin more tightly (Pieter Wuille) 97072d668 Remove unused CCoins methods (Pieter Wuille) ce23efaa5 Extend coins_tests (Pieter Wuille) 508307968 Switch CCoinsView and chainstate db from per-txid to per-txout (Pieter Wuille) 4ec0d9e79 Refactor GetUTXOStats in preparation for per-COutPoint iteration (Pieter Wuille) 13870b56f Replace CCoins-based CTxMemPool::pruneSpent with isSpent (Pieter Wuille) 05293f3cb Remove ModifyCoins/ModifyNewCoins (Pieter Wuille) 961e48397 Switch tests from ModifyCoins to AddCoin/SpendCoin (Pieter Wuille) 8b3868c1b Switch CScriptCheck to use Coin instead of CCoins (Pieter Wuille) c87b957a3 Only pass things committed to by tx's witness hash to CScriptCheck (Matt Corallo) f68cdfe92 Switch from per-tx to per-txout CCoinsViewCache methods in some places (Pieter Wuille) 000391132 Introduce new per-txout CCoinsViewCache functions (Pieter Wuille) bd83111a0 Optimization: Coin&& to ApplyTxInUndo (Pieter Wuille) cb2c7fdac Replace CTxInUndo with Coin (Pieter Wuille) 422634e2f Introduce Coin, a single unspent output (Pieter Wuille) 7d991b55d Store/allow tx metadata in all undo records (Pieter Wuille) c3aa0c119 Report on-disk size in gettxoutsetinfo (Pieter Wuille) d34242430 Remove/ignore tx version in utxo and undo (Pieter Wuille) 7e0032290 Add specialization of SipHash for 256 + 32 bit data (Pieter Wuille) e484652fc Introduce CHashVerifier to hash read data (Pieter Wuille) f54580e7e error() in disconnect for disk corruption, not inconsistency (Pieter Wuille) e66dbde6d Add SizeEstimate to CDBBatch (Pieter Wuille) Tree-SHA512: ce1fb1e40c77d38915cd02189fab7a8b125c7f44d425c85579d872c3bede3a437760997907c99d7b3017ced1c2de54b2ac7223d99d83a6658fe5ef61edef1de3
2017-06-02 00:47:58 +02:00
if (exists(txin.prevout.hash)) continue;
pvNoSpendsRemaining->push_back(txin.prevout);
}
}
}
}
Backport Bitcoin#9424, Bitcoin#10123 and Bitcoin#10153 (#2918) * Contains dashification. disables `-debug dash` Merge #9424: Change LogAcceptCategory to use uint32_t rather than sets of strings. 6b3bb3d Change LogAcceptCategory to use uint32_t rather than sets of strings. (Gregory Maxwell) Tree-SHA512: ebb5bcf9a7d00a32dd1390b727ff4d29330a038423611da01268d8e1d2c0229e52a1098e751d4e6db73ef4ae862e1e96d38249883fcaf12b68f55ebb01035b34 Signed-off-by: Pasta <Pasta@dash.org> 31 -> 32 Signed-off-by: Pasta <Pasta@dash.org> * Merge #10123: Allow debug logs to be excluded from specified component 3bde556 Add -debugexclude option to switch off logging for specified components (John Newbery) Tree-SHA512: 30202e3f2085fc2fc5dd4bedb92988f4cb162c612a42cf8f6395a7da326f34975ddc347f82bc4ddca6c84c438dc0cc6e87869f90c7ff88105dbeaa52a947fa43 * bump to uint64_t due to added Dash codes Signed-off-by: Pasta <Pasta@dash.org> * bump to uint64_t due to added Dash codes cont. Signed-off-by: Pasta <Pasta@dash.org> * string -> BCLog format Signed-off-by: Pasta <Pasta@dash.org> * uint32_t -> uint64_t Signed-off-by: Pasta <Pasta@dash.org> * Fix CBatchedLogger * Fix most fDebug-s * Fix `debug` rpc * Fix BENCH and RAND conflicts * Add ALERT and use it * Update LogPrint-s in dash-specific code * Tweak few log categories Specifically: - use PRIVATESEND in `CPrivateSendClientManager::GetRandomNotUsedMasternode()` - use ZMQ in `CZMQPublishRawGovernanceVoteNotifier::NotifyGovernanceVote()` and `CZMQPublishRawGovernanceObjectNotifier::NotifyGovernanceObject()` * Drop no longer used MASTERNODE category * Merge #10153: logging: Fix off-by-one for shrinkdebugfile default faab624 logging: Fix off-by-one for shrinkdebugfile (MarcoFalke) Tree-SHA512: d6153e06067906172ff0611af9e585a3ecf0a7d56925b6ad7c12e75aa802441047059b9b6f6c78e79916c3f2abc8f1998bfd2d5b84201ec6421f727c08da3c21 * Shift dash-specific log categories to start from `1ul << 32` to avoid potential future conflicts with bitcoin ones * Fix `dash` category * remove debugCategories Signed-off-by: Pasta <Pasta@dash.org> * Prepend "std::" to find call * Check for BCLog::PRIVATESEND instead of logCategories != BCLog::NONE * Use BCLog::MNPAYMENTS category instead of checking for logCategories != BCLog::NONE * Move "End Dash" comment below "ALERT" When adding new entries here, we'll otherwise get confused with ordering and might end up forgetting that adding something Dash specific must continue with the bit after 43.
2019-05-22 23:51:39 +02:00
if (maxFeeRateRemoved > CFeeRate(0)) {
LogPrint(BCLog::MEMPOOL, "Removed %u txn, rolling minimum fee bumped to %s\n", nTxnRemoved, maxFeeRateRemoved.ToString());
}
}
Merge #10195: Switch chainstate db and cache to per-txout model 589827975 scripted-diff: various renames for per-utxo consistency (Pieter Wuille) a5e02bc7f Increase travis unit test timeout (Pieter Wuille) 73de2c1ff Rename CCoinsCacheEntry::coins to coin (Pieter Wuille) 119e552f7 Merge CCoinsViewCache's GetOutputFor and AccessCoin (Pieter Wuille) 580b02309 [MOVEONLY] Move old CCoins class to txdb.cpp (Pieter Wuille) 8b25d2c0c Upgrade from per-tx database to per-txout (Pieter Wuille) b2af357f3 Reduce reserved memory space for flushing (Pieter Wuille) 41aa5b79a Pack Coin more tightly (Pieter Wuille) 97072d668 Remove unused CCoins methods (Pieter Wuille) ce23efaa5 Extend coins_tests (Pieter Wuille) 508307968 Switch CCoinsView and chainstate db from per-txid to per-txout (Pieter Wuille) 4ec0d9e79 Refactor GetUTXOStats in preparation for per-COutPoint iteration (Pieter Wuille) 13870b56f Replace CCoins-based CTxMemPool::pruneSpent with isSpent (Pieter Wuille) 05293f3cb Remove ModifyCoins/ModifyNewCoins (Pieter Wuille) 961e48397 Switch tests from ModifyCoins to AddCoin/SpendCoin (Pieter Wuille) 8b3868c1b Switch CScriptCheck to use Coin instead of CCoins (Pieter Wuille) c87b957a3 Only pass things committed to by tx's witness hash to CScriptCheck (Matt Corallo) f68cdfe92 Switch from per-tx to per-txout CCoinsViewCache methods in some places (Pieter Wuille) 000391132 Introduce new per-txout CCoinsViewCache functions (Pieter Wuille) bd83111a0 Optimization: Coin&& to ApplyTxInUndo (Pieter Wuille) cb2c7fdac Replace CTxInUndo with Coin (Pieter Wuille) 422634e2f Introduce Coin, a single unspent output (Pieter Wuille) 7d991b55d Store/allow tx metadata in all undo records (Pieter Wuille) c3aa0c119 Report on-disk size in gettxoutsetinfo (Pieter Wuille) d34242430 Remove/ignore tx version in utxo and undo (Pieter Wuille) 7e0032290 Add specialization of SipHash for 256 + 32 bit data (Pieter Wuille) e484652fc Introduce CHashVerifier to hash read data (Pieter Wuille) f54580e7e error() in disconnect for disk corruption, not inconsistency (Pieter Wuille) e66dbde6d Add SizeEstimate to CDBBatch (Pieter Wuille) Tree-SHA512: ce1fb1e40c77d38915cd02189fab7a8b125c7f44d425c85579d872c3bede3a437760997907c99d7b3017ced1c2de54b2ac7223d99d83a6658fe5ef61edef1de3
2017-06-02 00:47:58 +02:00
Merge #12634: [refactor] Make TransactionWithinChainLimit more flexible f77e1d34fd5f17304ce319b5f962b8005592501a test: Add MempoolAncestryTests (Karl-Johan Alm) a08d76bcfee6f563a268933357931abe32060668 mempool: Calculate descendant maximum thoroughly (Karl-Johan Alm) 6d3568371eb2559d65a3e2334252d36a262319e8 wallet: Switch to using ancestor/descendant limits (Karl-Johan Alm) 6888195b062c8c58dd776fd10b44b25554eb1f15 wallet: Strictly greater than for ancestor caps (Karl-Johan Alm) 322b12ac4e0a8c892e81a760ff7225619248b74f Remove deprecated TransactionWithinChainLimit (Karl-Johan Alm) 47847515473b054929af0c8de3d54b6672500cab Switch to GetTransactionAncestry() in OutputEligibleForSpending (Karl-Johan Alm) 475a385a80198a46a6d99846f99b968f04e9b470 Add GetTransactionAncestry to CTxMemPool for general purpose chain limit checking (Karl-Johan Alm) 46847d69d2c1cc908fd779daac7884e365955dbd mempool: Fix max descendants check (Karl-Johan Alm) b9ef21dd727dde33f5bd3c33226b05d07eb12aac mempool: Add explicit max_descendants (Karl-Johan Alm) Pull request description: Currently, `TransactionWithinChainLimit` is restricted to single-output use, and needs to be called every time for different limits. If it is replaced with a chain limit value calculator, that can be called once and reused, and is generally more flexible (see e.g. #12257). Update: this PR now corrects usage of max ancestors / max descendants, including calculating the correct max descendant value, as advertised for the two limits. ~~This change also makes `nMaxAncestors` signed, as the replacement method will return `-1` for "not in the mempool", which is different from "0", which means "no ancestors/descendants in mempool".~~ ~~This is a subset of #12257.~~ Tree-SHA512: aa59c849360542362b3126c0e29d44d3d58f11898e277d38c034dc4b86a5b4500f77ac61767599ce878c876b5c446fec9c02699797eb2fa41e530ec863a00cf9
2018-06-11 15:35:42 +02:00
uint64_t CTxMemPool::CalculateDescendantMaximum(txiter entry) const {
// find parent with highest descendant count
std::vector<txiter> candidates;
setEntries counted;
candidates.push_back(entry);
uint64_t maximum = 0;
while (candidates.size()) {
txiter candidate = candidates.back();
candidates.pop_back();
if (!counted.insert(candidate).second) continue;
Merge #19478: Remove CTxMempool::mapLinks data structure member 296be8f58e02b39a58f017c52294aceed22c3ffd Get rid of unused functions CTxMemPool::GetMemPoolChildren, CTxMemPool::GetMemPoolParents (Jeremy Rubin) 46d955d196043cc297834baeebce31ff778dff80 Remove mapLinks in favor of entry inlined structs with iterator type erasure (Jeremy Rubin) Pull request description: Currently we have a peculiar data structure in the mempool called maplinks. Maplinks job is to track the in-pool children and parents of each transaction. This PR can be primarily understood and reviewed as a simple refactoring to remove this extra data structure, although it comes with a nice memory and performance improvement for free. Maplinks is particularly peculiar because removing it is not as simple as just moving it's inner structure to the owning CTxMempoolEntry. Because TxLinks (the class storing the setEntries for parents and children) store txiters to each entry in the mempool corresponding to the parent or child, it means that the TxLinks type is "aware" of the boost multiindex (mapTx) it's coming from, which is in turn, aware of the entry type stored in mapTx. Thus we used maplinks to store this entry associated data we in an entirely separate data structure just to avoid a circular type reference caused by storing a txiter inside a CTxMempoolEntry. It turns out, we can kill this circular reference by making use of iterator_to multiindex function and std::reference_wrapper. This allows us to get rid of the maplinks data structure and move the ownership of the parents/child sets to the entries themselves. The benefit of this good all around, for any of the reasons given below the change would be acceptable, and it doesn't make the code harder to reason about or worse in any respect (as far as I can tell, there's no tradeoff). ### Simpler ownership model No longer having to consistency check that mapLinks did have records for our CTxMempoolEntry, impossible to have a mapLinks entry outlive or incorrectly die before a CTxMempoolEntry. ### Memory Usage We get rid of a O(Transactions) sized map in the mempool, which is a long lived data structure. ### Performance If you have a CTxMemPoolEntry, you immediately know the address of it's children/parents, rather than having to do a O(log(Transactions)) lookup via maplinks (which we do very often). We do it in *so many* places that a true benchmark has to look at a full running node, but it is easy enough to show an improvement in this case. The ComplexMemPool shows a good coherence check that we see the expected result of it being 12.5% faster / 1.14x faster. ``` Before: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.40462, 0.277222, 0.285339, 0.279793 After: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.22586, 0.243831, 0.247076, 0.244596 ``` The ComplexMemPool benchmark only checks doing addUnchecked and TrimToSize for 800 transactions. While this bench does a good job of hammering the relevant types of function, it doesn't test everything. Subbing in 5000 transactions shows a that the advantage isn't completely wiped out by other asymptotic factors (this isn't the only bottleneck in growing the mempool), but it's only a bit proportionally slower (10.8%, 1.12x), which adds evidence that this will be a good change for performance minded users. ``` # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 59.1321, 11.5919, 12.235, 11.7068 # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 52.1307, 10.2641, 10.5206, 10.4306 ``` I don't think it's possible to come up with an example of where a maplinks based design would have better performance, but it's something for reviewers to consider. # Discussion ## Why maplinks in the first place? I spoke with the author of mapLinks (sdaftuar) a while back, and my recollection from our conversation was that it was implemented because he did not know how to resolve the circular dependency at the time, and there was no other reason for making it a separate map. ## Is iterator_to weird? iterator_to is expressly for this purpose, see https://www.boost.org/doc/libs/1_51_0/libs/multi_index/doc/tutorial/indices.html#iterator_to > iterator_to provides a way to retrieve an iterator to an element from a pointer to the element, thus making iterators and pointers interchangeable for the purposes of element pointing (not so for traversal) in many situations. This notwithstanding, it is not the aim of iterator_to to promote the usage of pointers as substitutes for real iterators: the latter are specifically designed for handling the elements of a container, and not only benefit from the iterator orientation of container interfaces, but are also capable of exposing many more programming bugs than raw pointers, both at compile and run time. iterator_to is thus meant to be used in scenarios where access via iterators is not suitable or desireable: > > - Interoperability with preexisting APIs based on pointers or references. > - Publication of pointer-based interfaces (for instance, when designing a C-compatible library). > - The exposure of pointers in place of iterators can act as a type erasure barrier effectively decoupling the user of the code from the implementation detail of which particular container is being used. Similar techniques, like the famous Pimpl idiom, are used in large projects to reduce dependencies and build times. > - Self-referencing contexts where an element acts upon its owner container and no iterator to itself is available. In other words, iterator_to is the perfect tool for the job by the last reason given. Under the hood it should just be a simple pointer cast and have no major runtime overhead (depending on if the function call is inlined). Edit by laanwj: removed at sign from the description ACKs for top commit: jonatack: re-ACK 296be8f per `git range-diff ab338a19 3ba1665 296be8f`, sanity check gcc 10.2 debug build is clean. hebasto: re-ACK 296be8f58e02b39a58f017c52294aceed22c3ffd, only rebased since my [previous](https://github.com/bitcoin/bitcoin/pull/19478#pullrequestreview-482400727) review (verified with `git range-diff`). Tree-SHA512: f5c30a4936fcde6ae32a02823c303b3568a747c2681d11f87df88a149f984a6d3b4c81f391859afbeb68864ef7f6a3d8779f74a58e3de701b3d51f78e498682e
2020-09-07 12:06:38 +02:00
const CTxMemPoolEntry::Parents& parents = candidate->GetMemPoolParentsConst();
Merge #12634: [refactor] Make TransactionWithinChainLimit more flexible f77e1d34fd5f17304ce319b5f962b8005592501a test: Add MempoolAncestryTests (Karl-Johan Alm) a08d76bcfee6f563a268933357931abe32060668 mempool: Calculate descendant maximum thoroughly (Karl-Johan Alm) 6d3568371eb2559d65a3e2334252d36a262319e8 wallet: Switch to using ancestor/descendant limits (Karl-Johan Alm) 6888195b062c8c58dd776fd10b44b25554eb1f15 wallet: Strictly greater than for ancestor caps (Karl-Johan Alm) 322b12ac4e0a8c892e81a760ff7225619248b74f Remove deprecated TransactionWithinChainLimit (Karl-Johan Alm) 47847515473b054929af0c8de3d54b6672500cab Switch to GetTransactionAncestry() in OutputEligibleForSpending (Karl-Johan Alm) 475a385a80198a46a6d99846f99b968f04e9b470 Add GetTransactionAncestry to CTxMemPool for general purpose chain limit checking (Karl-Johan Alm) 46847d69d2c1cc908fd779daac7884e365955dbd mempool: Fix max descendants check (Karl-Johan Alm) b9ef21dd727dde33f5bd3c33226b05d07eb12aac mempool: Add explicit max_descendants (Karl-Johan Alm) Pull request description: Currently, `TransactionWithinChainLimit` is restricted to single-output use, and needs to be called every time for different limits. If it is replaced with a chain limit value calculator, that can be called once and reused, and is generally more flexible (see e.g. #12257). Update: this PR now corrects usage of max ancestors / max descendants, including calculating the correct max descendant value, as advertised for the two limits. ~~This change also makes `nMaxAncestors` signed, as the replacement method will return `-1` for "not in the mempool", which is different from "0", which means "no ancestors/descendants in mempool".~~ ~~This is a subset of #12257.~~ Tree-SHA512: aa59c849360542362b3126c0e29d44d3d58f11898e277d38c034dc4b86a5b4500f77ac61767599ce878c876b5c446fec9c02699797eb2fa41e530ec863a00cf9
2018-06-11 15:35:42 +02:00
if (parents.size() == 0) {
maximum = std::max(maximum, candidate->GetCountWithDescendants());
} else {
Merge #19478: Remove CTxMempool::mapLinks data structure member 296be8f58e02b39a58f017c52294aceed22c3ffd Get rid of unused functions CTxMemPool::GetMemPoolChildren, CTxMemPool::GetMemPoolParents (Jeremy Rubin) 46d955d196043cc297834baeebce31ff778dff80 Remove mapLinks in favor of entry inlined structs with iterator type erasure (Jeremy Rubin) Pull request description: Currently we have a peculiar data structure in the mempool called maplinks. Maplinks job is to track the in-pool children and parents of each transaction. This PR can be primarily understood and reviewed as a simple refactoring to remove this extra data structure, although it comes with a nice memory and performance improvement for free. Maplinks is particularly peculiar because removing it is not as simple as just moving it's inner structure to the owning CTxMempoolEntry. Because TxLinks (the class storing the setEntries for parents and children) store txiters to each entry in the mempool corresponding to the parent or child, it means that the TxLinks type is "aware" of the boost multiindex (mapTx) it's coming from, which is in turn, aware of the entry type stored in mapTx. Thus we used maplinks to store this entry associated data we in an entirely separate data structure just to avoid a circular type reference caused by storing a txiter inside a CTxMempoolEntry. It turns out, we can kill this circular reference by making use of iterator_to multiindex function and std::reference_wrapper. This allows us to get rid of the maplinks data structure and move the ownership of the parents/child sets to the entries themselves. The benefit of this good all around, for any of the reasons given below the change would be acceptable, and it doesn't make the code harder to reason about or worse in any respect (as far as I can tell, there's no tradeoff). ### Simpler ownership model No longer having to consistency check that mapLinks did have records for our CTxMempoolEntry, impossible to have a mapLinks entry outlive or incorrectly die before a CTxMempoolEntry. ### Memory Usage We get rid of a O(Transactions) sized map in the mempool, which is a long lived data structure. ### Performance If you have a CTxMemPoolEntry, you immediately know the address of it's children/parents, rather than having to do a O(log(Transactions)) lookup via maplinks (which we do very often). We do it in *so many* places that a true benchmark has to look at a full running node, but it is easy enough to show an improvement in this case. The ComplexMemPool shows a good coherence check that we see the expected result of it being 12.5% faster / 1.14x faster. ``` Before: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.40462, 0.277222, 0.285339, 0.279793 After: # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 1.22586, 0.243831, 0.247076, 0.244596 ``` The ComplexMemPool benchmark only checks doing addUnchecked and TrimToSize for 800 transactions. While this bench does a good job of hammering the relevant types of function, it doesn't test everything. Subbing in 5000 transactions shows a that the advantage isn't completely wiped out by other asymptotic factors (this isn't the only bottleneck in growing the mempool), but it's only a bit proportionally slower (10.8%, 1.12x), which adds evidence that this will be a good change for performance minded users. ``` # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 59.1321, 11.5919, 12.235, 11.7068 # Benchmark, evals, iterations, total, min, max, median ComplexMemPool, 5, 1, 52.1307, 10.2641, 10.5206, 10.4306 ``` I don't think it's possible to come up with an example of where a maplinks based design would have better performance, but it's something for reviewers to consider. # Discussion ## Why maplinks in the first place? I spoke with the author of mapLinks (sdaftuar) a while back, and my recollection from our conversation was that it was implemented because he did not know how to resolve the circular dependency at the time, and there was no other reason for making it a separate map. ## Is iterator_to weird? iterator_to is expressly for this purpose, see https://www.boost.org/doc/libs/1_51_0/libs/multi_index/doc/tutorial/indices.html#iterator_to > iterator_to provides a way to retrieve an iterator to an element from a pointer to the element, thus making iterators and pointers interchangeable for the purposes of element pointing (not so for traversal) in many situations. This notwithstanding, it is not the aim of iterator_to to promote the usage of pointers as substitutes for real iterators: the latter are specifically designed for handling the elements of a container, and not only benefit from the iterator orientation of container interfaces, but are also capable of exposing many more programming bugs than raw pointers, both at compile and run time. iterator_to is thus meant to be used in scenarios where access via iterators is not suitable or desireable: > > - Interoperability with preexisting APIs based on pointers or references. > - Publication of pointer-based interfaces (for instance, when designing a C-compatible library). > - The exposure of pointers in place of iterators can act as a type erasure barrier effectively decoupling the user of the code from the implementation detail of which particular container is being used. Similar techniques, like the famous Pimpl idiom, are used in large projects to reduce dependencies and build times. > - Self-referencing contexts where an element acts upon its owner container and no iterator to itself is available. In other words, iterator_to is the perfect tool for the job by the last reason given. Under the hood it should just be a simple pointer cast and have no major runtime overhead (depending on if the function call is inlined). Edit by laanwj: removed at sign from the description ACKs for top commit: jonatack: re-ACK 296be8f per `git range-diff ab338a19 3ba1665 296be8f`, sanity check gcc 10.2 debug build is clean. hebasto: re-ACK 296be8f58e02b39a58f017c52294aceed22c3ffd, only rebased since my [previous](https://github.com/bitcoin/bitcoin/pull/19478#pullrequestreview-482400727) review (verified with `git range-diff`). Tree-SHA512: f5c30a4936fcde6ae32a02823c303b3568a747c2681d11f87df88a149f984a6d3b4c81f391859afbeb68864ef7f6a3d8779f74a58e3de701b3d51f78e498682e
2020-09-07 12:06:38 +02:00
for (const CTxMemPoolEntry& i : parents) {
candidates.push_back(mapTx.iterator_to(i));
Merge #12634: [refactor] Make TransactionWithinChainLimit more flexible f77e1d34fd5f17304ce319b5f962b8005592501a test: Add MempoolAncestryTests (Karl-Johan Alm) a08d76bcfee6f563a268933357931abe32060668 mempool: Calculate descendant maximum thoroughly (Karl-Johan Alm) 6d3568371eb2559d65a3e2334252d36a262319e8 wallet: Switch to using ancestor/descendant limits (Karl-Johan Alm) 6888195b062c8c58dd776fd10b44b25554eb1f15 wallet: Strictly greater than for ancestor caps (Karl-Johan Alm) 322b12ac4e0a8c892e81a760ff7225619248b74f Remove deprecated TransactionWithinChainLimit (Karl-Johan Alm) 47847515473b054929af0c8de3d54b6672500cab Switch to GetTransactionAncestry() in OutputEligibleForSpending (Karl-Johan Alm) 475a385a80198a46a6d99846f99b968f04e9b470 Add GetTransactionAncestry to CTxMemPool for general purpose chain limit checking (Karl-Johan Alm) 46847d69d2c1cc908fd779daac7884e365955dbd mempool: Fix max descendants check (Karl-Johan Alm) b9ef21dd727dde33f5bd3c33226b05d07eb12aac mempool: Add explicit max_descendants (Karl-Johan Alm) Pull request description: Currently, `TransactionWithinChainLimit` is restricted to single-output use, and needs to be called every time for different limits. If it is replaced with a chain limit value calculator, that can be called once and reused, and is generally more flexible (see e.g. #12257). Update: this PR now corrects usage of max ancestors / max descendants, including calculating the correct max descendant value, as advertised for the two limits. ~~This change also makes `nMaxAncestors` signed, as the replacement method will return `-1` for "not in the mempool", which is different from "0", which means "no ancestors/descendants in mempool".~~ ~~This is a subset of #12257.~~ Tree-SHA512: aa59c849360542362b3126c0e29d44d3d58f11898e277d38c034dc4b86a5b4500f77ac61767599ce878c876b5c446fec9c02699797eb2fa41e530ec863a00cf9
2018-06-11 15:35:42 +02:00
}
}
}
return maximum;
}
void CTxMemPool::GetTransactionAncestry(const uint256& txid, size_t& ancestors, size_t& descendants) const {
LOCK(cs);
auto it = mapTx.find(txid);
Merge #12634: [refactor] Make TransactionWithinChainLimit more flexible f77e1d34fd5f17304ce319b5f962b8005592501a test: Add MempoolAncestryTests (Karl-Johan Alm) a08d76bcfee6f563a268933357931abe32060668 mempool: Calculate descendant maximum thoroughly (Karl-Johan Alm) 6d3568371eb2559d65a3e2334252d36a262319e8 wallet: Switch to using ancestor/descendant limits (Karl-Johan Alm) 6888195b062c8c58dd776fd10b44b25554eb1f15 wallet: Strictly greater than for ancestor caps (Karl-Johan Alm) 322b12ac4e0a8c892e81a760ff7225619248b74f Remove deprecated TransactionWithinChainLimit (Karl-Johan Alm) 47847515473b054929af0c8de3d54b6672500cab Switch to GetTransactionAncestry() in OutputEligibleForSpending (Karl-Johan Alm) 475a385a80198a46a6d99846f99b968f04e9b470 Add GetTransactionAncestry to CTxMemPool for general purpose chain limit checking (Karl-Johan Alm) 46847d69d2c1cc908fd779daac7884e365955dbd mempool: Fix max descendants check (Karl-Johan Alm) b9ef21dd727dde33f5bd3c33226b05d07eb12aac mempool: Add explicit max_descendants (Karl-Johan Alm) Pull request description: Currently, `TransactionWithinChainLimit` is restricted to single-output use, and needs to be called every time for different limits. If it is replaced with a chain limit value calculator, that can be called once and reused, and is generally more flexible (see e.g. #12257). Update: this PR now corrects usage of max ancestors / max descendants, including calculating the correct max descendant value, as advertised for the two limits. ~~This change also makes `nMaxAncestors` signed, as the replacement method will return `-1` for "not in the mempool", which is different from "0", which means "no ancestors/descendants in mempool".~~ ~~This is a subset of #12257.~~ Tree-SHA512: aa59c849360542362b3126c0e29d44d3d58f11898e277d38c034dc4b86a5b4500f77ac61767599ce878c876b5c446fec9c02699797eb2fa41e530ec863a00cf9
2018-06-11 15:35:42 +02:00
ancestors = descendants = 0;
if (it != mapTx.end()) {
ancestors = it->GetCountWithAncestors();
descendants = CalculateDescendantMaximum(it);
}
}
bool CTxMemPool::IsLoaded() const
{
LOCK(cs);
return m_is_loaded;
}
void CTxMemPool::SetIsLoaded(bool loaded)
{
LOCK(cs);
m_is_loaded = loaded;
}