dash/test/lint/run-lint-format-strings.py

309 lines
13 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
#
# Copyright (c) 2018 The Bitcoin Core developers
# Distributed under the MIT software license, see the accompanying
# file COPYING or http://www.opensource.org/licenses/mit-license.php.
#
# Lint format strings: This program checks that the number of arguments passed
# to a variadic format string function matches the number of format specifiers
# in the format string.
import argparse
import re
import sys
from functools import partial
from multiprocessing import Pool
FALSE_POSITIVES = [
2020-07-29 15:29:43 +02:00
("src/batchedlogger.h", "strprintf(fmt, args...)"),
("src/dbwrapper.cpp", "vsnprintf(p, limit - p, format, backup_ap)"),
("src/index/base.cpp", "FatalError(const char* fmt, const Args&... args)"),
Merge #13033: Build txindex in parallel with validation 9b2704777c [doc] Include txindex changes in the release notes. (Jim Posen) ed77dd6b30 [test] Simple unit test for TxIndex. (Jim Posen) 6d772a3d44 [rpc] Public interfaces to GetTransaction block until synced. (Jim Posen) a03f804f2a [index] Move disk IO logic from GetTransaction to TxIndex::FindTx. (Jim Posen) e0a3b80033 [validation] Replace tx index code in validation code with TxIndex. (Jim Posen) 8181db88f6 [init] Initialize and start TxIndex in init code. (Jim Posen) f90c3a62f5 [index] TxIndex method to wait until caught up. (Jim Posen) 70d510d93c [index] Allow TxIndex sync thread to be interrupted. (Jim Posen) 94b4f8bbb9 [index] TxIndex initial sync thread. (Jim Posen) 34d68bf3a3 [index] Create new TxIndex class. (Jim Posen) c88bcec93f [db] Migration for txindex data to new, separate database. (Jim Posen) 0cb8303241 [db] Create separate database for txindex. (Jim Posen) Pull request description: I'm re-opening #11857 as a new pull request because the last one stopped loading for people ------------------------------- This refactors the tx index code to be in it's own class and get built concurrently with validation code. The main benefit is decoupling and moving the txindex into a separate DB. The primary motivation is to lay the groundwork for other indexers that might be desired (such as the [compact filters](https://github.com/bitcoin/bips/pull/636)). The basic idea is that the TxIndex spins up its own thread, which first syncs the txindex to the current block index, then once in sync the BlockConnected ValidationInterface hook writes new blocks. ### DB changes At the suggestion of some other developers, the txindex has been split out into a separate database. A data migration runs at startup on any nodes with a legacy txindex. Currently the migration blocks node initialization until complete. ### Open questions - Should the migration of txindex data from the old DB to the new DB block in init or should it happen in a background thread? The downside to backgrounding it is that `getrawtransaction` would return an error message saying the txindex is syncing while the migration is running. ### Impact In a sample size n=1 test where I synced nodes from scratch, the average time [Index writing](https://github.com/bitcoin/bitcoin/blob/master/src/validation.cpp#L1903) was 3.36ms in master and 1.72ms in this branch. The average time between `UpdateTip` log lines for sequential blocks between 400,000 and IBD end on mainnet was 0.297204s in master and 0.286134s in this branch. Most likely this is just variance in IBD times, but I can try with some more trials if people want. Tree-SHA512: 451fd7d95df89dfafceaa723cdf0f7b137615b531cf5c5035cfb54e9ccc2026cec5ac85edbcf71b7f4e2f102e36e9202b8b3a667e1504a9e1a9976ab1f0079c4
2021-05-25 12:48:04 +02:00
("src/index/txindex.cpp", "FatalError(const char* fmt, const Args&... args)"),
("src/netbase.cpp", "LogConnectFailure(bool manual_connection, const char* fmt, const Args&... args)"),
2020-07-29 15:29:43 +02:00
("src/qt/networkstyle.cpp", "strprintf(appName, gArgs.GetDevNetName())"),
("src/qt/networkstyle.cpp", "strprintf(titleAddText, gArgs.GetDevNetName())"),
("src/rpc/evo.cpp", "strprintf(it->second, nParamNum)"),
2020-07-29 15:29:43 +02:00
("src/stacktraces.cpp", "strprintf(fmtStr, i, si.pc, lstr, fstr)"),
("src/util/system.cpp", "strprintf(_(COPYRIGHT_HOLDERS).translated, COPYRIGHT_HOLDERS_SUBSTITUTION)"),
("src/validationinterface.cpp", "LogPrint(BCLog::VALIDATION, fmt \"\\n\", __VA_ARGS__)"),
("src/wallet/wallet.h", "WalletLogPrintf(std::string fmt, Params... parameters)"),
("src/wallet/wallet.h", "LogPrintf((\"%s \" + fmt).c_str(), GetDisplayName(), parameters...)"),
Merge #17261: Make ScriptPubKeyMan an actual interface and the wallet to have multiple 3f373659d732a5b1e5fdc692a45b2b8179f66bec Refactor: Replace SigningProvider pointers with unique_ptrs (Andrew Chow) 3afe53c4039103670cec5f9cace897ead76e20a8 Cleanup: Drop unused GUI learnRelatedScripts method (Andrew Chow) e2f02aa59e3402048269362ff692d49a6df35cfd Refactor: Copy CWallet signals and print function to LegacyScriptPubKeyMan (Andrew Chow) c729afd0a3b74a3943e4c359270beaf3e6ff8a7b Box the wallet: Add multiple keyman maps and loops (Andrew Chow) 4977c30d59e88a3e5ee248144bcc023debcd895b refactor: define a UINT256_ONE global constant (Andrew Chow) 415afcccd3e5583defdb76e3a280f48e98983301 HD Split: Avoid redundant upgrades (Andrew Chow) 01b4511206e399981a77976deb15785d18db46ae Make UpgradeKeyMetadata work only on LegacyScriptPubKeyMan (Andrew Chow) 4a7e43e8460127a40a7895519587399feff3b682 Store p2sh scripts in AddAndGetDestinationForScript (Andrew Chow) 501acb5538008d98abe79288b92040bc186b93f3 Always try to sign for all pubkeys in multisig (Andrew Chow) 81610eddbc57c46ae243f45d73e715d509f53a6c List output types in an array in order to be iterated over (Andrew Chow) eb81fc3ee58d3e88af36d8091b9e4017a8603b3c Refactor: Allow LegacyScriptPubKeyMan to be null (Andrew Chow) fadc08ad944cad42e805228cdd58e0332f4d7184 Locking: Lock cs_KeyStore instead of cs_wallet in legacy keyman (Andrew Chow) f5be479694d4dbaf59eef562d80fbeacb3bb7dc1 wallet: Improve CWallet:MarkDestinationsDirty (João Barbosa) Pull request description: Continuation of wallet boxes project. Actually makes ScriptPubKeyMan an interface which LegacyScriptPubkeyMan. Moves around functions and things from CWallet into LegacyScriptPubKeyMan so that they are actually separate things without circular dependencies. *** Introducing the `ScriptPubKeyMan` (short for ScriptPubKeyManager) for managing scriptPubKeys and their associated scripts and keys. This functionality is moved over from `CWallet`. Instead, `CWallet` will have a pointer to a `ScriptPubKeyMan` for every possible address type, internal and external. It will fetch the correct `ScriptPubKeyMan` as necessary. When fetching new addresses, it chooses the `ScriptPubKeyMan` based on address type and whether it is change. For signing, it takes the script and asks each `ScriptPubKeyMan` for whether that `ScriptPubKeyMan` considers that script `IsMine`, whether it has that script, or whether it is able to produce a signature for it. If so, the `ScriptPubKeyMan` will provide a `SigningProvider` to the caller which will use that in order to sign. There is currently one `ScriptPubKeyMan` - the `LegacyScriptPubKeyMan`. Each `CWallet` will have only one `LegacyScriptPubKeyMan` with the pointers for all of the address types and change pointing to this `LegacyScriptPubKeyMan`. It is created when the wallet is loaded and all keys and metadata are loaded into it instead of `CWallet`. The `LegacyScriptPubKeyMan` is primarily made up of all of the key and script management that used to be in `CWallet`. For convenience, `CWallet` has a `GetLegacyScriptPubKeyMan` which will return the `LegacyScriptPubKeyMan` or a `nullptr` if it does not have one (not yet implemented, but callers will check for the `nullptr`). For purposes of signing, `LegacyScriptPubKeyMan`'s `GetSigningProvider` will return itself rather than a separate `SigningProvider`. This will be different for future `ScriptPubKeyMan`s. The `LegacyScriptPubKeyMan` will also handle the importing and exporting of keys and scripts instead of `CWallet`. As such, a number of RPCs have been limited to work only if a `LegacyScriptPubKeyMan` can be retrieved from the wallet. These RPCs are `sethdseed`, `addmultisigaddress`, `importaddress`, `importprivkey`, `importpubkey`, `importmulti`, `dumpprivkey`, and `dumpwallet`. Other RPCs which relied on the wallet for scripts and keys have been modified in order to take the `SigningProvider` retrieved from the `ScriptPubKeyMan` for a given script. Overall, these changes should not effect how everything actually works and the user should experience no difference between having this change and not having it. As such, no functional tests were changed, and the only unit tests changed were those that were directly accessing `CWallet` functions that have been removed. This PR is the last step in the [Wallet Structure Changes](https://github.com/bitcoin-core/bitcoin-devwiki/wiki/Wallet-Class-Structure-Changes). ACKs for top commit: instagibbs: re-utACK https://github.com/bitcoin/bitcoin/pull/17261/commits/3f373659d732a5b1e5fdc692a45b2b8179f66bec Sjors: re-utACK 3f373659d732a5b1e5fdc692a45b2b8179f66bec (it still compiles on macOS after https://github.com/bitcoin/bitcoin/pull/17261#discussion_r370377070) meshcollider: Tested re-ACK 3f373659d732a5b1e5fdc692a45b2b8179f66bec Tree-SHA512: f8e2b8d9efa750b617691e8702d217ec4c33569ec2554a060141d9eb9b9a3a5323e4216938e2485c44625d7a6e0925d40dea1362b3af9857cf08860c2f344716
2019-10-07 20:11:34 +02:00
("src/wallet/scriptpubkeyman.h", "WalletLogPrintf(std::string fmt, Params... parameters)"),
("src/wallet/scriptpubkeyman.h", "LogPrintf((\"%s \" + fmt).c_str(), m_storage.GetDisplayName(), parameters...)"),
("src/logging.h", "LogPrintf(const char* fmt, const Args&... args)"),
Merge #17260: Split some CWallet functions into new LegacyScriptPubKeyMan (#4938) * Move wallet enums to walletutil.h * MOVEONLY: Move key handling code out of wallet to keyman file Start moving wallet and ismine code to scriptpubkeyman.h, scriptpubkeyman.cpp The easiest way to review this commit is to run: git log -p -n1 --color-moved=dimmed_zebra And check that everything is a move (other than includes and copyrights comments). This commit is move-only and doesn't change code or affect behavior. * Refactor: Split up CWallet and LegacyScriptPubKeyMan and classes This moves CWallet members and methods dealing with keys to a new LegacyScriptPubKeyMan class, and updates calling code to reference the new class instead of CWallet. Most of the changes are simple text replacements and variable substitutions easily verified with: git log -p -n1 -U0 --word-diff-regex=. The only nontrivial chunk of code added is the new LegacyScriptPubKeyMan class declaration, but this code isn't new and is just selectively copied and moved from the previous CWallet class declaration. This can be verified with: git log -p -n1 --color-moved=dimmed_zebra src/wallet/scriptpubkeyman.h src/wallet/wallet.h or git diff HEAD~1:src/wallet/wallet.h HEAD:src/wallet/scriptpubkeyman.h This commit does not change behavior. * Renamed classes in scriptpubkeyman * Fixes for conflicts, compilation and linkage errors due to previous commits * Reordered methods in scriptpubkeyman to make further backports easier * Reordered methods in scriptpubkeyman to make further backports easier (part II) * Remove HDChain copy from SigningProvider class * fixes/suggestions Co-authored-by: Andrew Chow <achow101-github@achow101.com> Co-authored-by: UdjinM6 <UdjinM6@users.noreply.github.com>
2022-08-08 18:05:21 +02:00
("src/wallet/scriptpubkeyman.h", "WalletLogPrintf(const std::string& fmt, const Params&... parameters)"),
]
def parse_function_calls(function_name, source_code):
"""Return an array with all calls to function function_name in string source_code.
Preprocessor directives and C++ style comments ("//") in source_code are removed.
>>> len(parse_function_calls("foo", "foo();bar();foo();bar();"))
2
>>> parse_function_calls("foo", "foo(1);bar(1);foo(2);bar(2);")[0].startswith("foo(1);")
True
>>> parse_function_calls("foo", "foo(1);bar(1);foo(2);bar(2);")[1].startswith("foo(2);")
True
>>> len(parse_function_calls("foo", "foo();bar();// foo();bar();"))
1
>>> len(parse_function_calls("foo", "#define FOO foo();"))
0
"""
2021-08-27 21:03:02 +02:00
assert type(function_name) is str and type(source_code) is str and function_name
lines = [re.sub("// .*", " ", line).strip()
for line in source_code.split("\n")
if not line.strip().startswith("#")]
return re.findall(r"[^a-zA-Z_](?=({}\(.*).*)".format(function_name), " " + " ".join(lines))
def normalize(s):
"""Return a normalized version of string s with newlines, tabs and C style comments ("/* ... */")
replaced with spaces. Multiple spaces are replaced with a single space.
>>> normalize(" /* nothing */ foo\tfoo /* bar */ foo ")
'foo foo foo'
"""
2021-08-27 21:03:02 +02:00
assert type(s) is str
s = s.replace("\n", " ")
s = s.replace("\t", " ")
s = re.sub(r"/\*.*?\*/", " ", s)
s = re.sub(" {2,}", " ", s)
return s.strip()
ESCAPE_MAP = {
r"\n": "[escaped-newline]",
r"\t": "[escaped-tab]",
r'\"': "[escaped-quote]",
}
def escape(s):
"""Return the escaped version of string s with "\\\"", "\\n" and "\\t" escaped as
"[escaped-backslash]", "[escaped-newline]" and "[escaped-tab]".
>>> unescape(escape("foo")) == "foo"
True
>>> escape(r'foo \\t foo \\n foo \\\\ foo \\ foo \\"bar\\"')
'foo [escaped-tab] foo [escaped-newline] foo \\\\\\\\ foo \\\\ foo [escaped-quote]bar[escaped-quote]'
"""
2021-08-27 21:03:02 +02:00
assert type(s) is str
for raw_value, escaped_value in ESCAPE_MAP.items():
s = s.replace(raw_value, escaped_value)
return s
def unescape(s):
"""Return the unescaped version of escaped string s.
Reverses the replacements made in function escape(s).
>>> unescape(escape("bar"))
'bar'
>>> unescape("foo [escaped-tab] foo [escaped-newline] foo \\\\\\\\ foo \\\\ foo [escaped-quote]bar[escaped-quote]")
'foo \\\\t foo \\\\n foo \\\\\\\\ foo \\\\ foo \\\\"bar\\\\"'
"""
2021-08-27 21:03:02 +02:00
assert type(s) is str
for raw_value, escaped_value in ESCAPE_MAP.items():
s = s.replace(escaped_value, raw_value)
return s
def parse_function_call_and_arguments(function_name, function_call):
"""Split string function_call into an array of strings consisting of:
* the string function_call followed by "("
* the function call argument #1
* ...
* the function call argument #n
* a trailing ");"
The strings returned are in escaped form. See escape(...).
>>> parse_function_call_and_arguments("foo", 'foo("%s", "foo");')
['foo(', '"%s",', ' "foo"', ')']
>>> parse_function_call_and_arguments("foo", 'foo("%s", "foo");')
['foo(', '"%s",', ' "foo"', ')']
>>> parse_function_call_and_arguments("foo", 'foo("%s %s", "foo", "bar");')
['foo(', '"%s %s",', ' "foo",', ' "bar"', ')']
>>> parse_function_call_and_arguments("fooprintf", 'fooprintf("%050d", i);')
['fooprintf(', '"%050d",', ' i', ')']
>>> parse_function_call_and_arguments("foo", 'foo(bar(foobar(barfoo("foo"))), foobar); barfoo')
['foo(', 'bar(foobar(barfoo("foo"))),', ' foobar', ')']
>>> parse_function_call_and_arguments("foo", "foo()")
['foo(', '', ')']
>>> parse_function_call_and_arguments("foo", "foo(123)")
['foo(', '123', ')']
>>> parse_function_call_and_arguments("foo", 'foo("foo")')
['foo(', '"foo"', ')']
>>> parse_function_call_and_arguments("strprintf", 'strprintf("%s (%d)", std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>,wchar_t>().to_bytes(buf), err);')
['strprintf(', '"%s (%d)",', ' std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>,wchar_t>().to_bytes(buf),', ' err', ')']
>>> parse_function_call_and_arguments("strprintf", 'strprintf("%s (%d)", foo<wchar_t>().to_bytes(buf), err);')
['strprintf(', '"%s (%d)",', ' foo<wchar_t>().to_bytes(buf),', ' err', ')']
>>> parse_function_call_and_arguments("strprintf", 'strprintf("%s (%d)", foo().to_bytes(buf), err);')
['strprintf(', '"%s (%d)",', ' foo().to_bytes(buf),', ' err', ')']
>>> parse_function_call_and_arguments("strprintf", 'strprintf("%s (%d)", foo << 1, err);')
['strprintf(', '"%s (%d)",', ' foo << 1,', ' err', ')']
>>> parse_function_call_and_arguments("strprintf", 'strprintf("%s (%d)", foo<bar>() >> 1, err);')
['strprintf(', '"%s (%d)",', ' foo<bar>() >> 1,', ' err', ')']
>>> parse_function_call_and_arguments("strprintf", 'strprintf("%s (%d)", foo < 1 ? bar : foobar, err);')
['strprintf(', '"%s (%d)",', ' foo < 1 ? bar : foobar,', ' err', ')']
>>> parse_function_call_and_arguments("strprintf", 'strprintf("%s (%d)", foo < 1, err);')
['strprintf(', '"%s (%d)",', ' foo < 1,', ' err', ')']
>>> parse_function_call_and_arguments("strprintf", 'strprintf("%s (%d)", foo > 1 ? bar : foobar, err);')
['strprintf(', '"%s (%d)",', ' foo > 1 ? bar : foobar,', ' err', ')']
>>> parse_function_call_and_arguments("strprintf", 'strprintf("%s (%d)", foo > 1, err);')
['strprintf(', '"%s (%d)",', ' foo > 1,', ' err', ')']
>>> parse_function_call_and_arguments("strprintf", 'strprintf("%s (%d)", foo <= 1, err);')
['strprintf(', '"%s (%d)",', ' foo <= 1,', ' err', ')']
>>> parse_function_call_and_arguments("strprintf", 'strprintf("%s (%d)", foo <= bar<1, 2>(1, 2), err);')
['strprintf(', '"%s (%d)",', ' foo <= bar<1, 2>(1, 2),', ' err', ')']
>>> parse_function_call_and_arguments("strprintf", 'strprintf("%s (%d)", foo>foo<1,2>(1,2)?bar:foobar,err)');
['strprintf(', '"%s (%d)",', ' foo>foo<1,2>(1,2)?bar:foobar,', 'err', ')']
>>> parse_function_call_and_arguments("strprintf", 'strprintf("%s (%d)", foo>foo<1,2>(1,2),err)');
['strprintf(', '"%s (%d)",', ' foo>foo<1,2>(1,2),', 'err', ')']
"""
2021-08-27 21:03:02 +02:00
assert type(function_name) is str and type(function_call) is str and function_name
remaining = normalize(escape(function_call))
expected_function_call = "{}(".format(function_name)
2021-08-27 21:03:02 +02:00
assert remaining.startswith(expected_function_call)
parts = [expected_function_call]
remaining = remaining[len(expected_function_call):]
open_parentheses = 1
open_template_arguments = 0
in_string = False
parts.append("")
for i, char in enumerate(remaining):
parts.append(parts.pop() + char)
if char == "\"":
in_string = not in_string
continue
if in_string:
continue
if char == "(":
open_parentheses += 1
continue
if char == ")":
open_parentheses -= 1
if open_parentheses > 1:
continue
if open_parentheses == 0:
parts.append(parts.pop()[:-1])
parts.append(char)
break
prev_char = remaining[i - 1] if i - 1 >= 0 else None
next_char = remaining[i + 1] if i + 1 <= len(remaining) - 1 else None
if char == "<" and next_char not in [" ", "<", "="] and prev_char not in [" ", "<"]:
open_template_arguments += 1
continue
if char == ">" and next_char not in [" ", ">", "="] and prev_char not in [" ", ">"] and open_template_arguments > 0:
open_template_arguments -= 1
if open_template_arguments > 0:
continue
if char == ",":
parts.append("")
return parts
def parse_string_content(argument):
"""Return the text within quotes in string argument.
>>> parse_string_content('1 "foo %d bar" 2')
'foo %d bar'
>>> parse_string_content('1 foobar 2')
''
>>> parse_string_content('1 "bar" 2')
'bar'
>>> parse_string_content('1 "foo" 2 "bar" 3')
'foobar'
>>> parse_string_content('1 "foo" 2 " " "bar" 3')
'foo bar'
>>> parse_string_content('""')
''
>>> parse_string_content('')
''
>>> parse_string_content('1 2 3')
''
"""
2021-08-27 21:03:02 +02:00
assert type(argument) is str
string_content = ""
in_string = False
for char in normalize(escape(argument)):
if char == "\"":
in_string = not in_string
elif in_string:
string_content += char
return string_content
def count_format_specifiers(format_string):
"""Return the number of format specifiers in string format_string.
>>> count_format_specifiers("foo bar foo")
0
>>> count_format_specifiers("foo %d bar foo")
1
>>> count_format_specifiers("foo %d bar %i foo")
2
>>> count_format_specifiers("foo %d bar %i foo %% foo")
2
>>> count_format_specifiers("foo %d bar %i foo %% foo %d foo")
3
>>> count_format_specifiers("foo %d bar %i foo %% foo %*d foo")
4
"""
2021-08-27 21:03:02 +02:00
assert type(format_string) is str
format_string = format_string.replace('%%', 'X')
n = 0
in_specifier = False
for i, char in enumerate(format_string):
if char == "%":
in_specifier = True
n += 1
elif char in "aAcdeEfFgGinopsuxX":
in_specifier = False
elif in_specifier and char == "*":
n += 1
return n
def handle_filename(filename, args):
exit_code = 0
with open(filename, "r", encoding="utf-8") as f:
for function_call_str in parse_function_calls(args.function_name, f.read()):
parts = parse_function_call_and_arguments(args.function_name, function_call_str)
relevant_function_call_str = unescape("".join(parts))[:512]
if (f.name, relevant_function_call_str) in FALSE_POSITIVES:
continue
if len(parts) < 3 + args.skip_arguments:
exit_code = 1
print("{}: Could not parse function call string \"{}(...)\": {}".format(f.name, args.function_name, relevant_function_call_str))
continue
argument_count = len(parts) - 3 - args.skip_arguments
format_str = parse_string_content(parts[1 + args.skip_arguments])
format_specifier_count = count_format_specifiers(format_str)
if format_specifier_count != argument_count:
exit_code = 1
print("{}: Expected {} argument(s) after format string but found {} argument(s): {}".format(f.name, format_specifier_count, argument_count, relevant_function_call_str))
continue
return exit_code
def main():
parser = argparse.ArgumentParser(description="This program checks that the number of arguments passed "
"to a variadic format string function matches the number of format "
"specifiers in the format string.")
parser.add_argument("--skip-arguments", type=int, help="number of arguments before the format string "
"argument (e.g. 1 in the case of fprintf)", default=0)
parser.add_argument("function_name", help="function name (e.g. fprintf)", default=None)
parser.add_argument("file", nargs="*", help="C++ source code file (e.g. foo.cpp)")
args = parser.parse_args()
exit_codes = []
with Pool(8) as pool:
exit_codes = pool.map(partial(handle_filename, args=args), args.file)
sys.exit(max(exit_codes))
if __name__ == "__main__":
main()