dash/src/util/strencodings.cpp

575 lines
18 KiB
C++
Raw Normal View History

// Copyright (c) 2009-2010 Satoshi Nakamoto
// Copyright (c) 2009-2020 The Bitcoin Core developers
// Distributed under the MIT software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
merge bitcoin#14555: Move util files to directory (script modified to account for Dash backports, doesn't account for rebasing) ------------- BEGIN SCRIPT --------------- mkdir -p src/util git mv src/util.h src/util/system.h git mv src/util.cpp src/util/system.cpp git mv src/utilmemory.h src/util/memory.h git mv src/utilmoneystr.h src/util/moneystr.h git mv src/utilmoneystr.cpp src/util/moneystr.cpp git mv src/utilstrencodings.h src/util/strencodings.h git mv src/utilstrencodings.cpp src/util/strencodings.cpp git mv src/utiltime.h src/util/time.h git mv src/utiltime.cpp src/util/time.cpp git mv src/utilasmap.h src/util/asmap.h git mv src/utilasmap.cpp src/util/asmap.cpp git mv src/utilstring.h src/util/string.h git mv src/utilstring.cpp src/util/string.cpp gsed -i 's/<util\.h>/<util\/system\.h>/g' $(git ls-files 'src/*.h' 'src/*.cpp') gsed -i 's/<utilmemory\.h>/<util\/memory\.h>/g' $(git ls-files 'src/*.h' 'src/*.cpp') gsed -i 's/<utilmoneystr\.h>/<util\/moneystr\.h>/g' $(git ls-files 'src/*.h' 'src/*.cpp') gsed -i 's/<utilstrencodings\.h>/<util\/strencodings\.h>/g' $(git ls-files 'src/*.h' 'src/*.cpp') gsed -i 's/<utiltime\.h>/<util\/time\.h>/g' $(git ls-files 'src/*.h' 'src/*.cpp') gsed -i 's/<utilasmap\.h>/<util\/asmap\.h>/g' $(git ls-files 'src/*.h' 'src/*.cpp') gsed -i 's/<utilstring\.h>/<util\/string\.h>/g' $(git ls-files 'src/*.h' 'src/*.cpp') gsed -i 's/BITCOIN_UTIL_H/BITCOIN_UTIL_SYSTEM_H/g' src/util/system.h gsed -i 's/BITCOIN_UTILMEMORY_H/BITCOIN_UTIL_MEMORY_H/g' src/util/memory.h gsed -i 's/BITCOIN_UTILMONEYSTR_H/BITCOIN_UTIL_MONEYSTR_H/g' src/util/moneystr.h gsed -i 's/BITCOIN_UTILSTRENCODINGS_H/BITCOIN_UTIL_STRENCODINGS_H/g' src/util/strencodings.h gsed -i 's/BITCOIN_UTILTIME_H/BITCOIN_UTIL_TIME_H/g' src/util/time.h gsed -i 's/BITCOIN_UTILASMAP_H/BITCOIN_UTIL_ASMAP_H/g' src/util/asmap.h gsed -i 's/BITCOIN_UTILSTRING_H/BITCOIN_UTIL_STRING_H/g' src/util/string.h gsed -i 's/ util\.\(h\|cpp\)/ util\/system\.\1/g' src/Makefile.am gsed -i 's/utilmemory\.\(h\|cpp\)/util\/memory\.\1/g' src/Makefile.am gsed -i 's/utilmoneystr\.\(h\|cpp\)/util\/moneystr\.\1/g' src/Makefile.am gsed -i 's/utilstrencodings\.\(h\|cpp\)/util\/strencodings\.\1/g' src/Makefile.am gsed -i 's/utiltime\.\(h\|cpp\)/util\/time\.\1/g' src/Makefile.am gsed -i 's/utilasmap\.\(h\|cpp\)/util\/asmap\.\1/g' src/Makefile.am gsed -i 's/utilstring\.\(h\|cpp\)/util\/string\.\1/g' src/Makefile.am gsed -i 's/-> util ->/-> util\/system ->/' test/lint/lint-circular-dependencies.sh gsed -i 's/src\/util\.cpp/src\/util\/system\.cpp/g' test/lint/lint-format-strings.py test/lint/lint-locale-dependence.sh gsed -i 's/src\/utilmoneystr\.cpp/src\/util\/moneystr\.cpp/g' test/lint/lint-locale-dependence.sh gsed -i 's/src\/utilstrencodings\.\(h\|cpp\)/src\/util\/strencodings\.\1/g' test/lint/lint-locale-dependence.sh ------------- END SCRIPT ---------------
2021-06-27 08:33:13 +02:00
#include <util/strencodings.h>
#include <util/string.h>
Backport 11651 (#3358) * scripted-diff: Replace #include "" with #include <> (ryanofsky) -BEGIN VERIFY SCRIPT- for f in \ src/*.cpp \ src/*.h \ src/bench/*.cpp \ src/bench/*.h \ src/compat/*.cpp \ src/compat/*.h \ src/consensus/*.cpp \ src/consensus/*.h \ src/crypto/*.cpp \ src/crypto/*.h \ src/crypto/ctaes/*.h \ src/policy/*.cpp \ src/policy/*.h \ src/primitives/*.cpp \ src/primitives/*.h \ src/qt/*.cpp \ src/qt/*.h \ src/qt/test/*.cpp \ src/qt/test/*.h \ src/rpc/*.cpp \ src/rpc/*.h \ src/script/*.cpp \ src/script/*.h \ src/support/*.cpp \ src/support/*.h \ src/support/allocators/*.h \ src/test/*.cpp \ src/test/*.h \ src/wallet/*.cpp \ src/wallet/*.h \ src/wallet/test/*.cpp \ src/wallet/test/*.h \ src/zmq/*.cpp \ src/zmq/*.h do base=${f%/*}/ relbase=${base#src/} sed -i "s:#include \"\(.*\)\"\(.*\):if test -e \$base'\\1'; then echo \"#include <\"\$relbase\"\\1>\\2\"; else echo \"#include <\\1>\\2\"; fi:e" $f done -END VERIFY SCRIPT- Signed-off-by: Pasta <pasta@dashboost.org> * scripted-diff: Replace #include "" with #include <> (Dash Specific) -BEGIN VERIFY SCRIPT- for f in \ src/bls/*.cpp \ src/bls/*.h \ src/evo/*.cpp \ src/evo/*.h \ src/governance/*.cpp \ src/governance/*.h \ src/llmq/*.cpp \ src/llmq/*.h \ src/masternode/*.cpp \ src/masternode/*.h \ src/privatesend/*.cpp \ src/privatesend/*.h do base=${f%/*}/ relbase=${base#src/} sed -i "s:#include \"\(.*\)\"\(.*\):if test -e \$base'\\1'; then echo \"#include <\"\$relbase\"\\1>\\2\"; else echo \"#include <\\1>\\2\"; fi:e" $f done -END VERIFY SCRIPT- Signed-off-by: Pasta <pasta@dashboost.org> * build: Remove -I for everything but project root Remove -I from build system for everything but the project root, and built-in dependencies. Signed-off-by: Pasta <pasta@dashboost.org> # Conflicts: # src/Makefile.test.include * qt: refactor: Use absolute include paths in .ui files * qt: refactor: Changes to make include paths absolute This makes all include paths in the GUI absolute. Many changes are involved as every single source file in src/qt/ assumes to be able to use relative includes. Signed-off-by: Pasta <pasta@dashboost.org> # Conflicts: # src/qt/dash.cpp # src/qt/optionsmodel.cpp # src/qt/test/rpcnestedtests.cpp * test: refactor: Use absolute include paths for test data files * Recommend #include<> syntax in developer notes * refactor: Include obj/build.h instead of build.h * END BACKPORT #11651 Remove trailing whitespace causing travis failure * fix backport 11651 Signed-off-by: Pasta <pasta@dashboost.org> * More of 11651 * fix blockchain.cpp Signed-off-by: pasta <pasta@dashboost.org> * Add missing "qt/" in includes * Add missing "test/" in includes * Fix trailing whitespaces Co-authored-by: Wladimir J. van der Laan <laanwj@gmail.com> Co-authored-by: Russell Yanofsky <russ@yanofsky.org> Co-authored-by: MeshCollider <dobsonsa68@gmail.com> Co-authored-by: UdjinM6 <UdjinM6@users.noreply.github.com>
2020-03-19 23:46:56 +01:00
#include <tinyformat.h>
#include <algorithm>
Merge bitcoin/bitcoin#24852: util: optimize HexStr 5e61532e72c1021fda9c7b213bd9cf397cb3a802 util: optimizes HexStr (Martin Leitner-Ankerl) 4e2b99f72a90b956f3050095abed4949aff9b516 bench: Adds a benchmark for HexStr (Martin Leitner-Ankerl) 67c8411c37b483caa2fe3f7f4f40b68ed2a9bcf7 test: Adds a test for HexStr that checks all 256 bytes (Martin Leitner-Ankerl) Pull request description: In my benchmark, this rewrite improves runtime 27% (g++) to 46% (clang++) for the benchmark `HexStrBench`: g++ 11.2.0 | ns/byte | byte/s | err% | ins/byte | cyc/byte | IPC | bra/byte | miss% | total | benchmark |--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:---------- | 0.94 | 1,061,381,310.36 | 0.7% | 12.00 | 3.01 | 3.990 | 1.00 | 0.0% | 0.01 | `HexStrBench` master | 0.68 | 1,465,366,544.25 | 1.7% | 6.00 | 2.16 | 2.778 | 1.00 | 0.0% | 0.01 | `HexStrBench` branch clang++ 13.0.1 | ns/byte | byte/s | err% | ins/byte | cyc/byte | IPC | bra/byte | miss% | total | benchmark |--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:---------- | 0.80 | 1,244,713,415.92 | 0.9% | 10.00 | 2.56 | 3.913 | 0.50 | 0.0% | 0.01 | `HexStrBench` master | 0.43 | 2,324,188,940.72 | 0.2% | 4.00 | 1.37 | 2.914 | 0.25 | 0.0% | 0.01 | `HexStrBench` branch Note that the idea for this change comes from denis2342 in #23364. This is a rewrite so no unaligned accesses occur. Also, the lookup table is now calculated at compile time, which hopefully makes the code a bit easier to review. ACKs for top commit: laanwj: Code review ACK 5e61532e72c1021fda9c7b213bd9cf397cb3a802 aureleoules: tACK 5e61532e72c1021fda9c7b213bd9cf397cb3a802. theStack: ACK 5e61532e72c1021fda9c7b213bd9cf397cb3a802 🚤 Tree-SHA512: 40b53d5908332473ef24918d3a80ad1292b60566c02585fa548eb4c3189754971be5a70325f4968fce6d714df898b52d9357aba14d4753a8c70e6ffd273a2319
2022-05-04 20:19:51 +02:00
#include <array>
#include <cstdlib>
#include <cstring>
#include <limits>
#include <optional>
static const std::string CHARS_ALPHA_NUM = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
2015-04-03 00:51:08 +02:00
static const std::string SAFE_CHARS[] =
{
2015-09-23 12:06:00 +02:00
CHARS_ALPHA_NUM + " .,;-_/:?@()", // SAFE_CHARS_DEFAULT
CHARS_ALPHA_NUM + " .,;-_?@", // SAFE_CHARS_UA_COMMENT
CHARS_ALPHA_NUM + ".-_", // SAFE_CHARS_FILENAME
CHARS_ALPHA_NUM + "!*'();:@&=+$,/?#[]-_.~%", // SAFE_CHARS_URI
};
std::string SanitizeString(std::string_view str, int rule)
{
std::string result;
for (char c : str) {
if (SAFE_CHARS[rule].find(c) != std::string::npos) {
result.push_back(c);
}
}
return result;
}
const signed char p_util_hexdigit[256] =
{ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
0,1,2,3,4,5,6,7,8,9,-1,-1,-1,-1,-1,-1,
-1,0xa,0xb,0xc,0xd,0xe,0xf,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,0xa,0xb,0xc,0xd,0xe,0xf,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, };
signed char HexDigit(char c)
{
return p_util_hexdigit[(unsigned char)c];
}
bool IsHex(std::string_view str)
{
for (char c : str) {
if (HexDigit(c) < 0) return false;
}
return (str.size() > 0) && (str.size()%2 == 0);
}
bool IsHexNumber(std::string_view str)
{
if (str.substr(0, 2) == "0x") str.remove_prefix(2);
for (char c : str) {
if (HexDigit(c) < 0) return false;
}
// Return false for empty string or "0x".
return str.size() > 0;
}
template <typename Byte>
std::vector<Byte> ParseHex(std::string_view str)
{
std::vector<Byte> vch;
auto it = str.begin();
while (it != str.end() && it + 1 != str.end()) {
if (IsSpace(*it)) {
++it;
continue;
}
auto c1 = HexDigit(*(it++));
auto c2 = HexDigit(*(it++));
if (c1 < 0 || c2 < 0) break;
vch.push_back(Byte(c1 << 4) | Byte(c2));
}
return vch;
}
template std::vector<std::byte> ParseHex(std::string_view);
template std::vector<uint8_t> ParseHex(std::string_view);
void SplitHostPort(std::string in, uint16_t& portOut, std::string& hostOut)
{
size_t colon = in.find_last_of(':');
// if a : is found, and it either follows a [...], or no other : is in the string, treat it as port separator
bool fHaveColon = colon != in.npos;
bool fBracketed = fHaveColon && (in[0] == '[' && in[colon - 1] == ']'); // if there is a colon, and in[0]=='[', colon is not 0, so in[colon-1] is safe
bool fMultiColon{fHaveColon && colon != 0 && (in.find_last_of(':', colon - 1) != in.npos)};
if (fHaveColon && (colon == 0 || fBracketed || !fMultiColon)) {
uint16_t n;
if (ParseUInt16(in.substr(colon + 1), &n)) {
in = in.substr(0, colon);
portOut = n;
}
}
if (in.size() > 0 && in[0] == '[' && in[in.size() - 1] == ']') {
hostOut = in.substr(1, in.size() - 2);
} else {
hostOut = in;
}
}
std::string EncodeBase64(Span<const unsigned char> input)
{
static const char *pbase64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
std::string str;
str.reserve(((input.size() + 2) / 3) * 4);
ConvertBits<8, 6, true>([&](int v) { str += pbase64[v]; }, input.begin(), input.end());
while (str.size() % 4) str += '=';
return str;
}
std::vector<unsigned char> DecodeBase64(const char* p, bool* pf_invalid)
{
static const int8_t decode64_table[256]{
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, 62, -1, -1, -1, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1,
-1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, 26, 27, 28,
29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
49, 50, 51, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
};
const char* e = p;
std::vector<uint8_t> val;
val.reserve(strlen(p));
while (*p != 0) {
int x = decode64_table[(unsigned char)*p];
if (x == -1) break;
val.push_back(uint8_t(x));
++p;
}
std::vector<unsigned char> ret;
ret.reserve((val.size() * 3) / 4);
bool valid = ConvertBits<6, 8, false>([&](unsigned char c) { ret.push_back(c); }, val.begin(), val.end());
const char* q = p;
while (valid && *p != 0) {
if (*p != '=') {
valid = false;
break;
}
++p;
}
valid = valid && (p - e) % 4 == 0 && p - q < 4;
*pf_invalid = !valid;
return ret;
}
std::string DecodeBase64(const std::string& str, bool* pf_invalid)
{
Merge #17753: util: Don't allow Base32/64-decoding or ParseMoney(…) on strings with embedded NUL characters. Add tests. 137c80d579502e329964d7d1028a9507d4667774 tests: Add tests for decoding/parsing of base32, base64 and money strings containing NUL characters (practicalswift) a6fc26da55dea3b76bd89fbbca24ded170238674 util: Don't allow DecodeBase32(...) of strings with embedded NUL characters (practicalswift) 93cc18b0f6fa5fa8144079a4f51904d8b3087e94 util: Don't allow DecodeBase64(...) of strings with embedded NUL characters (practicalswift) ccc53e43c5464058171d6291da861a88184b230e util: Don't allow ParseMoney(...) of strings with embedded NUL characters (practicalswift) Pull request description: Don't allow Base32/64-decoding or `ParseMoney(…)` on strings with embedded `NUL` characters. Add tests. Added tests before: ``` $ src/test/test_bitcoin Running 385 test cases... test/base32_tests.cpp(31): error: in "base32_tests/base32_testvectors": check failure == true has failed [false != true] test/base64_tests.cpp(31): error: in "base64_tests/base64_testvectors": check failure == true has failed [false != true] test/util_tests.cpp(1074): error: in "util_tests/util_ParseMoney": check !ParseMoney(std::string("\0-1", 3), ret) has failed test/util_tests.cpp(1076): error: in "util_tests/util_ParseMoney": check !ParseMoney(std::string("1\0", 2), ret) has failed *** 4 failures are detected in the test module "Bitcoin Core Test Suite" ``` Added tests after: ``` $ src/test/test_bitcoin Running 385 test cases... *** No errors detected ``` ACKs for top commit: laanwj: Code review ACK 137c80d579502e329964d7d1028a9507d4667774 Tree-SHA512: 9486a0d32b4cf686bf5a47a0778338ac571fa39c66ad6d6d6cede58ec798e87bb50a2f9b7fd79ecd1fef1ba284e4073c1b430110967073ff87bdbbde7cada447
2019-12-16 16:15:51 +01:00
if (!ValidAsCString(str)) {
*pf_invalid = true;
Merge #17753: util: Don't allow Base32/64-decoding or ParseMoney(…) on strings with embedded NUL characters. Add tests. 137c80d579502e329964d7d1028a9507d4667774 tests: Add tests for decoding/parsing of base32, base64 and money strings containing NUL characters (practicalswift) a6fc26da55dea3b76bd89fbbca24ded170238674 util: Don't allow DecodeBase32(...) of strings with embedded NUL characters (practicalswift) 93cc18b0f6fa5fa8144079a4f51904d8b3087e94 util: Don't allow DecodeBase64(...) of strings with embedded NUL characters (practicalswift) ccc53e43c5464058171d6291da861a88184b230e util: Don't allow ParseMoney(...) of strings with embedded NUL characters (practicalswift) Pull request description: Don't allow Base32/64-decoding or `ParseMoney(…)` on strings with embedded `NUL` characters. Add tests. Added tests before: ``` $ src/test/test_bitcoin Running 385 test cases... test/base32_tests.cpp(31): error: in "base32_tests/base32_testvectors": check failure == true has failed [false != true] test/base64_tests.cpp(31): error: in "base64_tests/base64_testvectors": check failure == true has failed [false != true] test/util_tests.cpp(1074): error: in "util_tests/util_ParseMoney": check !ParseMoney(std::string("\0-1", 3), ret) has failed test/util_tests.cpp(1076): error: in "util_tests/util_ParseMoney": check !ParseMoney(std::string("1\0", 2), ret) has failed *** 4 failures are detected in the test module "Bitcoin Core Test Suite" ``` Added tests after: ``` $ src/test/test_bitcoin Running 385 test cases... *** No errors detected ``` ACKs for top commit: laanwj: Code review ACK 137c80d579502e329964d7d1028a9507d4667774 Tree-SHA512: 9486a0d32b4cf686bf5a47a0778338ac571fa39c66ad6d6d6cede58ec798e87bb50a2f9b7fd79ecd1fef1ba284e4073c1b430110967073ff87bdbbde7cada447
2019-12-16 16:15:51 +01:00
return {};
}
std::vector<unsigned char> vchRet = DecodeBase64(str.c_str(), pf_invalid);
return std::string((const char*)vchRet.data(), vchRet.size());
}
std::string EncodeBase32(Span<const unsigned char> input, bool pad)
{
static const char *pbase32 = "abcdefghijklmnopqrstuvwxyz234567";
std::string str;
str.reserve(((input.size() + 4) / 5) * 8);
ConvertBits<8, 5, true>([&](int v) { str += pbase32[v]; }, input.begin(), input.end());
if (pad) {
while (str.size() % 8) {
str += '=';
}
}
return str;
}
std::string EncodeBase32(const std::string& str, bool pad)
{
return EncodeBase32(MakeUCharSpan(str), pad);
}
std::vector<unsigned char> DecodeBase32(const char* p, bool* pf_invalid)
{
static const int8_t decode32_table[256]{
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1,
-1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, 0, 1, 2,
3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
23, 24, 25, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
};
const char* e = p;
std::vector<uint8_t> val;
val.reserve(strlen(p));
while (*p != 0) {
int x = decode32_table[(unsigned char)*p];
if (x == -1) break;
val.push_back(uint8_t(x));
++p;
}
std::vector<unsigned char> ret;
ret.reserve((val.size() * 5) / 8);
bool valid = ConvertBits<5, 8, false>([&](unsigned char c) { ret.push_back(c); }, val.begin(), val.end());
const char* q = p;
while (valid && *p != 0) {
if (*p != '=') {
valid = false;
break;
}
++p;
}
valid = valid && (p - e) % 8 == 0 && p - q < 8;
*pf_invalid = !valid;
return ret;
}
std::string DecodeBase32(const std::string& str, bool* pf_invalid)
{
Merge #17753: util: Don't allow Base32/64-decoding or ParseMoney(…) on strings with embedded NUL characters. Add tests. 137c80d579502e329964d7d1028a9507d4667774 tests: Add tests for decoding/parsing of base32, base64 and money strings containing NUL characters (practicalswift) a6fc26da55dea3b76bd89fbbca24ded170238674 util: Don't allow DecodeBase32(...) of strings with embedded NUL characters (practicalswift) 93cc18b0f6fa5fa8144079a4f51904d8b3087e94 util: Don't allow DecodeBase64(...) of strings with embedded NUL characters (practicalswift) ccc53e43c5464058171d6291da861a88184b230e util: Don't allow ParseMoney(...) of strings with embedded NUL characters (practicalswift) Pull request description: Don't allow Base32/64-decoding or `ParseMoney(…)` on strings with embedded `NUL` characters. Add tests. Added tests before: ``` $ src/test/test_bitcoin Running 385 test cases... test/base32_tests.cpp(31): error: in "base32_tests/base32_testvectors": check failure == true has failed [false != true] test/base64_tests.cpp(31): error: in "base64_tests/base64_testvectors": check failure == true has failed [false != true] test/util_tests.cpp(1074): error: in "util_tests/util_ParseMoney": check !ParseMoney(std::string("\0-1", 3), ret) has failed test/util_tests.cpp(1076): error: in "util_tests/util_ParseMoney": check !ParseMoney(std::string("1\0", 2), ret) has failed *** 4 failures are detected in the test module "Bitcoin Core Test Suite" ``` Added tests after: ``` $ src/test/test_bitcoin Running 385 test cases... *** No errors detected ``` ACKs for top commit: laanwj: Code review ACK 137c80d579502e329964d7d1028a9507d4667774 Tree-SHA512: 9486a0d32b4cf686bf5a47a0778338ac571fa39c66ad6d6d6cede58ec798e87bb50a2f9b7fd79ecd1fef1ba284e4073c1b430110967073ff87bdbbde7cada447
2019-12-16 16:15:51 +01:00
if (!ValidAsCString(str)) {
*pf_invalid = true;
Merge #17753: util: Don't allow Base32/64-decoding or ParseMoney(…) on strings with embedded NUL characters. Add tests. 137c80d579502e329964d7d1028a9507d4667774 tests: Add tests for decoding/parsing of base32, base64 and money strings containing NUL characters (practicalswift) a6fc26da55dea3b76bd89fbbca24ded170238674 util: Don't allow DecodeBase32(...) of strings with embedded NUL characters (practicalswift) 93cc18b0f6fa5fa8144079a4f51904d8b3087e94 util: Don't allow DecodeBase64(...) of strings with embedded NUL characters (practicalswift) ccc53e43c5464058171d6291da861a88184b230e util: Don't allow ParseMoney(...) of strings with embedded NUL characters (practicalswift) Pull request description: Don't allow Base32/64-decoding or `ParseMoney(…)` on strings with embedded `NUL` characters. Add tests. Added tests before: ``` $ src/test/test_bitcoin Running 385 test cases... test/base32_tests.cpp(31): error: in "base32_tests/base32_testvectors": check failure == true has failed [false != true] test/base64_tests.cpp(31): error: in "base64_tests/base64_testvectors": check failure == true has failed [false != true] test/util_tests.cpp(1074): error: in "util_tests/util_ParseMoney": check !ParseMoney(std::string("\0-1", 3), ret) has failed test/util_tests.cpp(1076): error: in "util_tests/util_ParseMoney": check !ParseMoney(std::string("1\0", 2), ret) has failed *** 4 failures are detected in the test module "Bitcoin Core Test Suite" ``` Added tests after: ``` $ src/test/test_bitcoin Running 385 test cases... *** No errors detected ``` ACKs for top commit: laanwj: Code review ACK 137c80d579502e329964d7d1028a9507d4667774 Tree-SHA512: 9486a0d32b4cf686bf5a47a0778338ac571fa39c66ad6d6d6cede58ec798e87bb50a2f9b7fd79ecd1fef1ba284e4073c1b430110967073ff87bdbbde7cada447
2019-12-16 16:15:51 +01:00
return {};
}
std::vector<unsigned char> vchRet = DecodeBase32(str.c_str(), pf_invalid);
return std::string((const char*)vchRet.data(), vchRet.size());
}
namespace {
template <typename T>
bool ParseIntegral(const std::string& str, T* out)
{
static_assert(std::is_integral<T>::value);
// Replicate the exact behavior of strtol/strtoll/strtoul/strtoull when
// handling leading +/- for backwards compatibility.
if (str.length() >= 2 && str[0] == '+' && str[1] == '-') {
return false;
}
const std::optional<T> opt_int = ToIntegral<T>((!str.empty() && str[0] == '+') ? str.substr(1) : str);
if (!opt_int) {
return false;
}
if (out != nullptr) {
*out = *opt_int;
}
return true;
}
}; // namespace
bool ParseInt32(const std::string& str, int32_t* out)
{
return ParseIntegral<int32_t>(str, out);
}
bool ParseInt64(const std::string& str, int64_t* out)
{
return ParseIntegral<int64_t>(str, out);
}
bool ParseUInt8(const std::string& str, uint8_t* out)
{
return ParseIntegral<uint8_t>(str, out);
}
bool ParseUInt16(const std::string& str, uint16_t* out)
{
return ParseIntegral<uint16_t>(str, out);
}
bool ParseUInt32(const std::string& str, uint32_t* out)
{
return ParseIntegral<uint32_t>(str, out);
}
bool ParseUInt64(const std::string& str, uint64_t* out)
{
return ParseIntegral<uint64_t>(str, out);
}
2015-05-31 15:36:44 +02:00
std::string FormatParagraph(const std::string& in, size_t width, size_t indent)
{
assert(width >= indent);
std::stringstream out;
size_t ptr = 0;
Merge #7192: Unify product name to as few places as possible 027fdb8 When/if the copyright line does not mention Bitcoin Core developers, add a second line to copyrights in -version, About dialog, and splash screen (Luke Dashjr) cc2095e Rewrite FormatParagraph to handle newlines within input strings correctly (Luke Dashjr) cddffaf Bugfix: Include COPYRIGHT_HOLDERS_SUBSTITUTION in Makefile substitutions so it gets passed to extract-strings correctly (Luke Dashjr) 29598e4 Move PACKAGE_URL to configure.ac (Luke Dashjr) 78ec83d splashscreen: Resize text to fit exactly (Luke Dashjr) 3cae140 Bugfix: Actually use _COPYRIGHT_HOLDERS_SUBSTITUTION everywhere (Luke Dashjr) 4d5a3df Bugfix: gitian-descriptors: Add missing python-setuptools requirement for OS X (biplist module) (Luke Dashjr) e4ab5e5 Bugfix: Correct copyright year in Mac DMG background image (Luke Dashjr) 917b1d0 Set copyright holders displayed in notices separately from the package name (Luke Dashjr) c39a6ff Travis & gitian-osx: Use depends for ds_store and mac_alias modules (Luke Dashjr) 902ccde depends: Add mac_alias to depends (Luke Dashjr) 82a2d98 depends: Add ds_store to depends (Cory Fields) de619a3 depends: Pass PYTHONPATH along to configure (Cory Fields) e611b6e macdeploy: Use rsvg-convert rather than cairosvg (Luke Dashjr) 63bcdc5 More complicated package name substitution for Mac deployment (Luke Dashjr) 1a6c67c Parameterise 2009 in translatable copyright strings (Luke Dashjr) d5f4683 Unify package name to as few places as possible without major changes (Luke Dashjr)
2016-02-04 13:41:58 +01:00
size_t indented = 0;
while (ptr < in.size())
{
Merge #7192: Unify product name to as few places as possible 027fdb8 When/if the copyright line does not mention Bitcoin Core developers, add a second line to copyrights in -version, About dialog, and splash screen (Luke Dashjr) cc2095e Rewrite FormatParagraph to handle newlines within input strings correctly (Luke Dashjr) cddffaf Bugfix: Include COPYRIGHT_HOLDERS_SUBSTITUTION in Makefile substitutions so it gets passed to extract-strings correctly (Luke Dashjr) 29598e4 Move PACKAGE_URL to configure.ac (Luke Dashjr) 78ec83d splashscreen: Resize text to fit exactly (Luke Dashjr) 3cae140 Bugfix: Actually use _COPYRIGHT_HOLDERS_SUBSTITUTION everywhere (Luke Dashjr) 4d5a3df Bugfix: gitian-descriptors: Add missing python-setuptools requirement for OS X (biplist module) (Luke Dashjr) e4ab5e5 Bugfix: Correct copyright year in Mac DMG background image (Luke Dashjr) 917b1d0 Set copyright holders displayed in notices separately from the package name (Luke Dashjr) c39a6ff Travis & gitian-osx: Use depends for ds_store and mac_alias modules (Luke Dashjr) 902ccde depends: Add mac_alias to depends (Luke Dashjr) 82a2d98 depends: Add ds_store to depends (Cory Fields) de619a3 depends: Pass PYTHONPATH along to configure (Cory Fields) e611b6e macdeploy: Use rsvg-convert rather than cairosvg (Luke Dashjr) 63bcdc5 More complicated package name substitution for Mac deployment (Luke Dashjr) 1a6c67c Parameterise 2009 in translatable copyright strings (Luke Dashjr) d5f4683 Unify package name to as few places as possible without major changes (Luke Dashjr)
2016-02-04 13:41:58 +01:00
size_t lineend = in.find_first_of('\n', ptr);
if (lineend == std::string::npos) {
lineend = in.size();
}
const size_t linelen = lineend - ptr;
const size_t rem_width = width - indented;
if (linelen <= rem_width) {
out << in.substr(ptr, linelen + 1);
ptr = lineend + 1;
indented = 0;
} else {
size_t finalspace = in.find_last_of(" \n", ptr + rem_width);
if (finalspace == std::string::npos || finalspace < ptr) {
// No place to break; just include the entire word and move on
finalspace = in.find_first_of("\n ", ptr);
if (finalspace == std::string::npos) {
// End of the string, just add it and break
out << in.substr(ptr);
break;
}
}
out << in.substr(ptr, finalspace - ptr) << "\n";
if (in[finalspace] == '\n') {
indented = 0;
} else if (indent) {
out << std::string(indent, ' ');
indented = indent;
}
ptr = finalspace + 1;
}
}
return out.str();
}
/** Upper bound for mantissa.
* 10^18-1 is the largest arbitrary decimal that will fit in a signed 64-bit integer.
* Larger integers cannot consist of arbitrary combinations of 0-9:
*
* 999999999999999999 1^18-1
* 9223372036854775807 (1<<63)-1 (max int64_t)
* 9999999999999999999 1^19-1 (would overflow)
*/
static const int64_t UPPER_BOUND = 1000000000000000000LL - 1LL;
/** Helper function for ParseFixedPoint */
static inline bool ProcessMantissaDigit(char ch, int64_t &mantissa, int &mantissa_tzeros)
{
if(ch == '0')
++mantissa_tzeros;
else {
for (int i=0; i<=mantissa_tzeros; ++i) {
if (mantissa > (UPPER_BOUND / 10LL))
return false; /* overflow */
mantissa *= 10;
}
mantissa += ch - '0';
mantissa_tzeros = 0;
}
return true;
}
bool ParseFixedPoint(const std::string &val, int decimals, int64_t *amount_out)
{
int64_t mantissa = 0;
int64_t exponent = 0;
int mantissa_tzeros = 0;
bool mantissa_sign = false;
bool exponent_sign = false;
int ptr = 0;
int end = val.size();
int point_ofs = 0;
if (ptr < end && val[ptr] == '-') {
mantissa_sign = true;
++ptr;
}
if (ptr < end)
{
if (val[ptr] == '0') {
/* pass single 0 */
++ptr;
} else if (val[ptr] >= '1' && val[ptr] <= '9') {
while (ptr < end && IsDigit(val[ptr])) {
if (!ProcessMantissaDigit(val[ptr], mantissa, mantissa_tzeros))
return false; /* overflow */
++ptr;
}
} else return false; /* missing expected digit */
} else return false; /* empty string or loose '-' */
if (ptr < end && val[ptr] == '.')
{
++ptr;
if (ptr < end && IsDigit(val[ptr]))
{
while (ptr < end && IsDigit(val[ptr])) {
if (!ProcessMantissaDigit(val[ptr], mantissa, mantissa_tzeros))
return false; /* overflow */
++ptr;
++point_ofs;
}
} else return false; /* missing expected digit */
}
if (ptr < end && (val[ptr] == 'e' || val[ptr] == 'E'))
{
++ptr;
if (ptr < end && val[ptr] == '+')
++ptr;
else if (ptr < end && val[ptr] == '-') {
exponent_sign = true;
++ptr;
}
if (ptr < end && IsDigit(val[ptr])) {
while (ptr < end && IsDigit(val[ptr])) {
if (exponent > (UPPER_BOUND / 10LL))
return false; /* overflow */
exponent = exponent * 10 + val[ptr] - '0';
++ptr;
}
} else return false; /* missing expected digit */
}
if (ptr != end)
return false; /* trailing garbage */
/* finalize exponent */
if (exponent_sign)
exponent = -exponent;
exponent = exponent - point_ofs + mantissa_tzeros;
/* finalize mantissa */
if (mantissa_sign)
mantissa = -mantissa;
/* convert to one 64-bit fixed-point value */
exponent += decimals;
if (exponent < 0)
return false; /* cannot represent values smaller than 10^-decimals */
if (exponent >= 18)
return false; /* cannot represent values larger than or equal to 10^(18-decimals) */
for (int i=0; i < exponent; ++i) {
if (mantissa > (UPPER_BOUND / 10LL) || mantissa < -(UPPER_BOUND / 10LL))
return false; /* overflow */
mantissa *= 10;
}
if (mantissa > UPPER_BOUND || mantissa < -UPPER_BOUND)
return false; /* overflow */
if (amount_out)
*amount_out = mantissa;
return true;
}
std::string ToLower(const std::string& str)
{
std::string r;
for (auto ch : str) r += ToLower(ch);
return r;
}
std::string ToUpper(const std::string& str)
{
std::string r;
for (auto ch : str) r += ToUpper(ch);
return r;
}
std::string Capitalize(std::string str)
{
if (str.empty()) return str;
str[0] = ToUpper(str.front());
return str;
}
Merge bitcoin/bitcoin#24852: util: optimize HexStr 5e61532e72c1021fda9c7b213bd9cf397cb3a802 util: optimizes HexStr (Martin Leitner-Ankerl) 4e2b99f72a90b956f3050095abed4949aff9b516 bench: Adds a benchmark for HexStr (Martin Leitner-Ankerl) 67c8411c37b483caa2fe3f7f4f40b68ed2a9bcf7 test: Adds a test for HexStr that checks all 256 bytes (Martin Leitner-Ankerl) Pull request description: In my benchmark, this rewrite improves runtime 27% (g++) to 46% (clang++) for the benchmark `HexStrBench`: g++ 11.2.0 | ns/byte | byte/s | err% | ins/byte | cyc/byte | IPC | bra/byte | miss% | total | benchmark |--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:---------- | 0.94 | 1,061,381,310.36 | 0.7% | 12.00 | 3.01 | 3.990 | 1.00 | 0.0% | 0.01 | `HexStrBench` master | 0.68 | 1,465,366,544.25 | 1.7% | 6.00 | 2.16 | 2.778 | 1.00 | 0.0% | 0.01 | `HexStrBench` branch clang++ 13.0.1 | ns/byte | byte/s | err% | ins/byte | cyc/byte | IPC | bra/byte | miss% | total | benchmark |--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:---------- | 0.80 | 1,244,713,415.92 | 0.9% | 10.00 | 2.56 | 3.913 | 0.50 | 0.0% | 0.01 | `HexStrBench` master | 0.43 | 2,324,188,940.72 | 0.2% | 4.00 | 1.37 | 2.914 | 0.25 | 0.0% | 0.01 | `HexStrBench` branch Note that the idea for this change comes from denis2342 in #23364. This is a rewrite so no unaligned accesses occur. Also, the lookup table is now calculated at compile time, which hopefully makes the code a bit easier to review. ACKs for top commit: laanwj: Code review ACK 5e61532e72c1021fda9c7b213bd9cf397cb3a802 aureleoules: tACK 5e61532e72c1021fda9c7b213bd9cf397cb3a802. theStack: ACK 5e61532e72c1021fda9c7b213bd9cf397cb3a802 🚤 Tree-SHA512: 40b53d5908332473ef24918d3a80ad1292b60566c02585fa548eb4c3189754971be5a70325f4968fce6d714df898b52d9357aba14d4753a8c70e6ffd273a2319
2022-05-04 20:19:51 +02:00
namespace {
using ByteAsHex = std::array<char, 2>;
constexpr std::array<ByteAsHex, 256> CreateByteToHexMap()
{
constexpr char hexmap[16] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'};
std::array<ByteAsHex, 256> byte_to_hex{};
for (size_t i = 0; i < byte_to_hex.size(); ++i) {
byte_to_hex[i][0] = hexmap[i >> 4];
byte_to_hex[i][1] = hexmap[i & 15];
}
return byte_to_hex;
}
} // namespace
std::string HexStr(const Span<const uint8_t> s)
{
std::string rv(s.size() * 2, '\0');
Merge bitcoin/bitcoin#24852: util: optimize HexStr 5e61532e72c1021fda9c7b213bd9cf397cb3a802 util: optimizes HexStr (Martin Leitner-Ankerl) 4e2b99f72a90b956f3050095abed4949aff9b516 bench: Adds a benchmark for HexStr (Martin Leitner-Ankerl) 67c8411c37b483caa2fe3f7f4f40b68ed2a9bcf7 test: Adds a test for HexStr that checks all 256 bytes (Martin Leitner-Ankerl) Pull request description: In my benchmark, this rewrite improves runtime 27% (g++) to 46% (clang++) for the benchmark `HexStrBench`: g++ 11.2.0 | ns/byte | byte/s | err% | ins/byte | cyc/byte | IPC | bra/byte | miss% | total | benchmark |--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:---------- | 0.94 | 1,061,381,310.36 | 0.7% | 12.00 | 3.01 | 3.990 | 1.00 | 0.0% | 0.01 | `HexStrBench` master | 0.68 | 1,465,366,544.25 | 1.7% | 6.00 | 2.16 | 2.778 | 1.00 | 0.0% | 0.01 | `HexStrBench` branch clang++ 13.0.1 | ns/byte | byte/s | err% | ins/byte | cyc/byte | IPC | bra/byte | miss% | total | benchmark |--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:---------- | 0.80 | 1,244,713,415.92 | 0.9% | 10.00 | 2.56 | 3.913 | 0.50 | 0.0% | 0.01 | `HexStrBench` master | 0.43 | 2,324,188,940.72 | 0.2% | 4.00 | 1.37 | 2.914 | 0.25 | 0.0% | 0.01 | `HexStrBench` branch Note that the idea for this change comes from denis2342 in #23364. This is a rewrite so no unaligned accesses occur. Also, the lookup table is now calculated at compile time, which hopefully makes the code a bit easier to review. ACKs for top commit: laanwj: Code review ACK 5e61532e72c1021fda9c7b213bd9cf397cb3a802 aureleoules: tACK 5e61532e72c1021fda9c7b213bd9cf397cb3a802. theStack: ACK 5e61532e72c1021fda9c7b213bd9cf397cb3a802 🚤 Tree-SHA512: 40b53d5908332473ef24918d3a80ad1292b60566c02585fa548eb4c3189754971be5a70325f4968fce6d714df898b52d9357aba14d4753a8c70e6ffd273a2319
2022-05-04 20:19:51 +02:00
static constexpr auto byte_to_hex = CreateByteToHexMap();
static_assert(sizeof(byte_to_hex) == 512);
char* it = rv.data();
for (uint8_t v : s) {
Merge bitcoin/bitcoin#24852: util: optimize HexStr 5e61532e72c1021fda9c7b213bd9cf397cb3a802 util: optimizes HexStr (Martin Leitner-Ankerl) 4e2b99f72a90b956f3050095abed4949aff9b516 bench: Adds a benchmark for HexStr (Martin Leitner-Ankerl) 67c8411c37b483caa2fe3f7f4f40b68ed2a9bcf7 test: Adds a test for HexStr that checks all 256 bytes (Martin Leitner-Ankerl) Pull request description: In my benchmark, this rewrite improves runtime 27% (g++) to 46% (clang++) for the benchmark `HexStrBench`: g++ 11.2.0 | ns/byte | byte/s | err% | ins/byte | cyc/byte | IPC | bra/byte | miss% | total | benchmark |--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:---------- | 0.94 | 1,061,381,310.36 | 0.7% | 12.00 | 3.01 | 3.990 | 1.00 | 0.0% | 0.01 | `HexStrBench` master | 0.68 | 1,465,366,544.25 | 1.7% | 6.00 | 2.16 | 2.778 | 1.00 | 0.0% | 0.01 | `HexStrBench` branch clang++ 13.0.1 | ns/byte | byte/s | err% | ins/byte | cyc/byte | IPC | bra/byte | miss% | total | benchmark |--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:---------- | 0.80 | 1,244,713,415.92 | 0.9% | 10.00 | 2.56 | 3.913 | 0.50 | 0.0% | 0.01 | `HexStrBench` master | 0.43 | 2,324,188,940.72 | 0.2% | 4.00 | 1.37 | 2.914 | 0.25 | 0.0% | 0.01 | `HexStrBench` branch Note that the idea for this change comes from denis2342 in #23364. This is a rewrite so no unaligned accesses occur. Also, the lookup table is now calculated at compile time, which hopefully makes the code a bit easier to review. ACKs for top commit: laanwj: Code review ACK 5e61532e72c1021fda9c7b213bd9cf397cb3a802 aureleoules: tACK 5e61532e72c1021fda9c7b213bd9cf397cb3a802. theStack: ACK 5e61532e72c1021fda9c7b213bd9cf397cb3a802 🚤 Tree-SHA512: 40b53d5908332473ef24918d3a80ad1292b60566c02585fa548eb4c3189754971be5a70325f4968fce6d714df898b52d9357aba14d4753a8c70e6ffd273a2319
2022-05-04 20:19:51 +02:00
std::memcpy(it, byte_to_hex[v].data(), 2);
it += 2;
}
Merge bitcoin/bitcoin#24852: util: optimize HexStr 5e61532e72c1021fda9c7b213bd9cf397cb3a802 util: optimizes HexStr (Martin Leitner-Ankerl) 4e2b99f72a90b956f3050095abed4949aff9b516 bench: Adds a benchmark for HexStr (Martin Leitner-Ankerl) 67c8411c37b483caa2fe3f7f4f40b68ed2a9bcf7 test: Adds a test for HexStr that checks all 256 bytes (Martin Leitner-Ankerl) Pull request description: In my benchmark, this rewrite improves runtime 27% (g++) to 46% (clang++) for the benchmark `HexStrBench`: g++ 11.2.0 | ns/byte | byte/s | err% | ins/byte | cyc/byte | IPC | bra/byte | miss% | total | benchmark |--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:---------- | 0.94 | 1,061,381,310.36 | 0.7% | 12.00 | 3.01 | 3.990 | 1.00 | 0.0% | 0.01 | `HexStrBench` master | 0.68 | 1,465,366,544.25 | 1.7% | 6.00 | 2.16 | 2.778 | 1.00 | 0.0% | 0.01 | `HexStrBench` branch clang++ 13.0.1 | ns/byte | byte/s | err% | ins/byte | cyc/byte | IPC | bra/byte | miss% | total | benchmark |--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:---------- | 0.80 | 1,244,713,415.92 | 0.9% | 10.00 | 2.56 | 3.913 | 0.50 | 0.0% | 0.01 | `HexStrBench` master | 0.43 | 2,324,188,940.72 | 0.2% | 4.00 | 1.37 | 2.914 | 0.25 | 0.0% | 0.01 | `HexStrBench` branch Note that the idea for this change comes from denis2342 in #23364. This is a rewrite so no unaligned accesses occur. Also, the lookup table is now calculated at compile time, which hopefully makes the code a bit easier to review. ACKs for top commit: laanwj: Code review ACK 5e61532e72c1021fda9c7b213bd9cf397cb3a802 aureleoules: tACK 5e61532e72c1021fda9c7b213bd9cf397cb3a802. theStack: ACK 5e61532e72c1021fda9c7b213bd9cf397cb3a802 🚤 Tree-SHA512: 40b53d5908332473ef24918d3a80ad1292b60566c02585fa548eb4c3189754971be5a70325f4968fce6d714df898b52d9357aba14d4753a8c70e6ffd273a2319
2022-05-04 20:19:51 +02:00
assert(it == rv.data() + rv.size());
return rv;
}
std::optional<uint64_t> ParseByteUnits(const std::string& str, ByteUnit default_multiplier)
{
if (str.empty()) {
return std::nullopt;
}
auto multiplier = default_multiplier;
char unit = str.back();
switch (unit) {
case 'k':
multiplier = ByteUnit::k;
break;
case 'K':
multiplier = ByteUnit::K;
break;
case 'm':
multiplier = ByteUnit::m;
break;
case 'M':
multiplier = ByteUnit::M;
break;
case 'g':
multiplier = ByteUnit::g;
break;
case 'G':
multiplier = ByteUnit::G;
break;
case 't':
multiplier = ByteUnit::t;
break;
case 'T':
multiplier = ByteUnit::T;
break;
default:
unit = 0;
break;
}
uint64_t unit_amount = static_cast<uint64_t>(multiplier);
auto parsed_num = ToIntegral<uint64_t>(unit ? str.substr(0, str.size() - 1) : str);
if (!parsed_num || parsed_num > std::numeric_limits<uint64_t>::max() / unit_amount) { // check overflow
return std::nullopt;
}
return *parsed_num * unit_amount;
}