ci: retry non-deterministic tests and report failure only if limit exhausted (#4793)

## Motivation

Dash Core has a series of functional tests that do not behave in a
deterministic fashion and have a higher chance of failure, especially on
resource limited systems. This results in a lot of false-negatives
during CI runs, prompting the need to re-run them, which is annoying at
best and generates apathy towards CI failure at worst.

## History

The first approach was to isolate non-deterministic tests into their own
distinct GItLab runner, making it such that if a test failed, only that
one runner had to be restarted, instead of the multiple runners that
failed due to these tests.

One problem with this was that this approach effectively omitted these
tests from TSan and UBSan coverage as attempting to combine TSan and
UBSan would cause significant resource exhaustion.

## Description

An alternative approach is to introduce a new flag, `--retries`,
applicable only on non-deterministic tests, that allow a failed test to
be repeated up to a set number of times (default: 3), only reporting
failure once the limit is exhausted.

A limitation of this is that only the log dump from the last attempt
will be available.

---------

Co-authored-by: UdjinM6 <UdjinM6@users.noreply.github.com>
This commit is contained in:
Kittywhiskers Van Gogh 2023-04-07 10:25:52 +05:30 committed by GitHub
parent b0f7612f2c
commit cc7be07fb9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -81,11 +81,38 @@ EXTENDED_SCRIPTS = [
'feature_dbcrash.py', 'feature_dbcrash.py',
] ]
BASE_SCRIPTS = [ NONDETERMINISTIC_SCRIPTS = [
# Scripts that are run by default. # Scripts that are run by default.
# Longest test should go first, to favor running tests in parallel # Longest test should go first, to favor running tests in parallel
'feature_dip3_deterministicmns.py', # NOTE: needs dash_hash to pass 'feature_dip3_deterministicmns.py', # NOTE: needs dash_hash to pass
'feature_llmq_data_recovery.py', 'feature_llmq_data_recovery.py',
# vv Tests less than 2m vv
'feature_dip3_v19.py',
'feature_llmq_signing.py', # NOTE: needs dash_hash to pass
'feature_llmq_signing.py --spork21', # NOTE: needs dash_hash to pass
'feature_llmq_chainlocks.py', # NOTE: needs dash_hash to pass
'feature_llmq_rotation.py', # NOTE: needs dash_hash to pass
'feature_llmq_connections.py', # NOTE: needs dash_hash to pass
'feature_llmq_hpmn.py', # NOTE: needs dash_hash to pass
'feature_llmq_simplepose.py', # NOTE: needs dash_hash to pass
'feature_llmq_is_cl_conflicts.py', # NOTE: needs dash_hash to pass
'feature_llmq_is_migration.py', # NOTE: needs dash_hash to pass
'feature_llmq_is_retroactive.py', # NOTE: needs dash_hash to pass
'feature_llmq_dkgerrors.py', # NOTE: needs dash_hash to pass
'feature_dip4_coinbasemerkleroots.py', # NOTE: needs dash_hash to pass
# vv Tests less than 60s vv
'wallet_listreceivedby.py',
# vv Tests less than 30s vv
'feature_dip0020_activation.py',
'interface_zmq_dash.py',
'mempool_unbroadcast.py',
'p2p_addrv2_relay.py',
'rpc_net.py',
]
BASE_SCRIPTS = [
# Scripts that are run by default.
# Longest test should go first, to favor running tests in parallel
'wallet_hd.py', 'wallet_hd.py',
'wallet_backup.py', 'wallet_backup.py',
# vv Tests less than 5m vv # vv Tests less than 5m vv
@ -107,26 +134,12 @@ BASE_SCRIPTS = [
'wallet_dump.py', 'wallet_dump.py',
'wallet_listtransactions.py', 'wallet_listtransactions.py',
'feature_multikeysporks.py', 'feature_multikeysporks.py',
'feature_dip3_v19.py',
'feature_llmq_signing.py', # NOTE: needs dash_hash to pass
'feature_llmq_signing.py --spork21', # NOTE: needs dash_hash to pass
'feature_llmq_chainlocks.py', # NOTE: needs dash_hash to pass
'feature_llmq_rotation.py', # NOTE: needs dash_hash to pass
'feature_llmq_connections.py', # NOTE: needs dash_hash to pass
'feature_llmq_hpmn.py', # NOTE: needs dash_hash to pass
'feature_llmq_simplepose.py', # NOTE: needs dash_hash to pass
'feature_llmq_is_cl_conflicts.py', # NOTE: needs dash_hash to pass
'feature_llmq_is_migration.py', # NOTE: needs dash_hash to pass
'feature_llmq_is_retroactive.py', # NOTE: needs dash_hash to pass
'feature_llmq_dkgerrors.py', # NOTE: needs dash_hash to pass
'feature_dip4_coinbasemerkleroots.py', # NOTE: needs dash_hash to pass
# vv Tests less than 60s vv # vv Tests less than 60s vv
'p2p_sendheaders.py', # NOTE: needs dash_hash to pass 'p2p_sendheaders.py', # NOTE: needs dash_hash to pass
'p2p_sendheaders_compressed.py', # NOTE: needs dash_hash to pass 'p2p_sendheaders_compressed.py', # NOTE: needs dash_hash to pass
'wallet_importmulti.py', 'wallet_importmulti.py',
'mempool_limit.py', 'mempool_limit.py',
'rpc_txoutproof.py', 'rpc_txoutproof.py',
'wallet_listreceivedby.py',
'wallet_abandonconflict.py', 'wallet_abandonconflict.py',
'feature_csv_activation.py', 'feature_csv_activation.py',
'rpc_rawtransaction.py', 'rpc_rawtransaction.py',
@ -136,7 +149,6 @@ BASE_SCRIPTS = [
'rpc_quorum.py', 'rpc_quorum.py',
'wallet_keypool_topup.py', 'wallet_keypool_topup.py',
'feature_fee_estimation.py', 'feature_fee_estimation.py',
'interface_zmq_dash.py',
'interface_zmq.py', 'interface_zmq.py',
'interface_bitcoin_cli.py', 'interface_bitcoin_cli.py',
'mempool_resurrect.py', 'mempool_resurrect.py',
@ -163,7 +175,6 @@ BASE_SCRIPTS = [
'rpc_whitelist.py', 'rpc_whitelist.py',
'feature_proxy.py', 'feature_proxy.py',
'rpc_signrawtransaction.py', 'rpc_signrawtransaction.py',
'p2p_addrv2_relay.py',
'wallet_groups.py', 'wallet_groups.py',
'p2p_disconnect_ban.py', 'p2p_disconnect_ban.py',
'feature_addressindex.py', 'feature_addressindex.py',
@ -174,7 +185,6 @@ BASE_SCRIPTS = [
'rpc_deprecated.py', 'rpc_deprecated.py',
'wallet_disable.py', 'wallet_disable.py',
'p2p_getdata.py', 'p2p_getdata.py',
'rpc_net.py',
'wallet_keypool.py', 'wallet_keypool.py',
'wallet_keypool_hd.py', 'wallet_keypool_hd.py',
'p2p_mempool.py', 'p2p_mempool.py',
@ -245,7 +255,6 @@ BASE_SCRIPTS = [
'wallet_create_tx.py', 'wallet_create_tx.py',
'p2p_fingerprint.py', 'p2p_fingerprint.py',
'rpc_platform_filter.py', 'rpc_platform_filter.py',
'feature_dip0020_activation.py',
'feature_uacomment.py', 'feature_uacomment.py',
'wallet_coinbase_category.py', 'wallet_coinbase_category.py',
'feature_filelock.py', 'feature_filelock.py',
@ -253,7 +262,6 @@ BASE_SCRIPTS = [
'p2p_blockfilters.py', 'p2p_blockfilters.py',
'feature_asmap.py', 'feature_asmap.py',
'feature_includeconf.py', 'feature_includeconf.py',
'mempool_unbroadcast.py',
'rpc_deriveaddresses.py', 'rpc_deriveaddresses.py',
'rpc_deriveaddresses.py --usecli', 'rpc_deriveaddresses.py --usecli',
'rpc_scantxoutset.py', 'rpc_scantxoutset.py',
@ -271,8 +279,8 @@ BASE_SCRIPTS = [
# Put them in a random line within the section that fits their approximate run-time # Put them in a random line within the section that fits their approximate run-time
] ]
# Place EXTENDED_SCRIPTS first since it has the 3 longest running tests # Place NONDETERMINISTIC_SCRIPTS first since it has the 3 longest running tests
ALL_SCRIPTS = EXTENDED_SCRIPTS + BASE_SCRIPTS ALL_SCRIPTS = NONDETERMINISTIC_SCRIPTS + EXTENDED_SCRIPTS + BASE_SCRIPTS
NON_SCRIPTS = [ NON_SCRIPTS = [
# These are python files that live in the functional tests directory, but are not test scripts. # These are python files that live in the functional tests directory, but are not test scripts.
@ -302,6 +310,8 @@ def main():
parser.add_argument('--tmpdirprefix', '-t', default=tempfile.gettempdir(), help="Root directory for datadirs") parser.add_argument('--tmpdirprefix', '-t', default=tempfile.gettempdir(), help="Root directory for datadirs")
parser.add_argument('--failfast', '-F', action='store_true', help='stop execution after the first test failure') parser.add_argument('--failfast', '-F', action='store_true', help='stop execution after the first test failure')
parser.add_argument('--filter', help='filter scripts to run by regular expression') parser.add_argument('--filter', help='filter scripts to run by regular expression')
parser.add_argument('--retries', '-r', type=int, default=3, help='how many reattempts should be allowed for the non-deterministic test suite. Default=3.')
parser.add_argument('--sleep', '-s', type=int, default=15, help='how many seconds should the test sleep before reattempting (in seconds). Default=15.')
args, unknown_args = parser.parse_known_args() args, unknown_args = parser.parse_known_args()
if not args.ansi: if not args.ansi:
@ -407,6 +417,8 @@ def main():
build_dir=config["environment"]["BUILDDIR"], build_dir=config["environment"]["BUILDDIR"],
tmpdir=tmpdir, tmpdir=tmpdir,
jobs=args.jobs, jobs=args.jobs,
retries=args.retries,
retry_sleep=args.sleep,
enable_coverage=args.coverage, enable_coverage=args.coverage,
args=passon_args, args=passon_args,
combined_logs_len=args.combinedlogslen, combined_logs_len=args.combinedlogslen,
@ -415,7 +427,7 @@ def main():
use_term_control=args.ansi, use_term_control=args.ansi,
) )
def run_tests(*, test_list, src_dir, build_dir, tmpdir, jobs=1, enable_coverage=False, args=None, combined_logs_len=0,failfast=False, runs_ci=False, use_term_control): def run_tests(*, test_list, src_dir, build_dir, tmpdir, jobs=1, retries=3, retry_sleep=15, enable_coverage=False, args=None, combined_logs_len=0,failfast=False, runs_ci=False, use_term_control):
args = args or [] args = args or []
# Warn if dashd is already running # Warn if dashd is already running
@ -476,15 +488,18 @@ def run_tests(*, test_list, src_dir, build_dir, tmpdir, jobs=1, enable_coverage=
max_len_name = len(max(test_list, key=len)) max_len_name = len(max(test_list, key=len))
test_count = len(test_list) test_count = len(test_list)
for i in range(test_count):
test_result, testdir, stdout, stderr = job_queue.get_next() def run_test(test_result, testdir, stdout, stderr, attempt, limit):
test_results.append(test_result)
done_str = "{}/{} - {}{}{}".format(i + 1, test_count, BOLD[1], test_result.name, BOLD[0])
if test_result.status == "Passed": if test_result.status == "Passed":
logging.debug("%s passed, Duration: %s s" % (done_str, test_result.time)) logging.debug("%s passed, Duration: %s s" % (done_str, test_result.time))
return True
elif test_result.status == "Skipped": elif test_result.status == "Skipped":
logging.debug("%s skipped" % (done_str)) logging.debug("%s skipped" % (done_str))
return True
else: else:
if limit > 1:
logging.debug("%s failed at attempt %s of %s, Duration: %s s\n" % (done_str, attempt, limit, test_result.time))
if limit == attempt:
print("%s failed, Duration: %s s\n" % (done_str, test_result.time)) print("%s failed, Duration: %s s\n" % (done_str, test_result.time))
print(BOLD[1] + 'stdout:\n' + BOLD[0] + stdout + '\n') print(BOLD[1] + 'stdout:\n' + BOLD[0] + stdout + '\n')
print(BOLD[1] + 'stderr:\n' + BOLD[0] + stderr + '\n') print(BOLD[1] + 'stderr:\n' + BOLD[0] + stderr + '\n')
@ -499,8 +514,23 @@ def run_tests(*, test_list, src_dir, build_dir, tmpdir, jobs=1, enable_coverage=
combined_logs_args += ['--color'] combined_logs_args += ['--color']
combined_logs, _ = subprocess.Popen(combined_logs_args, universal_newlines=True, stdout=subprocess.PIPE).communicate() combined_logs, _ = subprocess.Popen(combined_logs_args, universal_newlines=True, stdout=subprocess.PIPE).communicate()
print("\n".join(deque(combined_logs.splitlines(), combined_logs_len))) print("\n".join(deque(combined_logs.splitlines(), combined_logs_len)))
else:
time.sleep(retry_sleep)
return False
if failfast: for i in range(test_count):
test_result, testdir, stdout, stderr = job_queue.get_next()
done_str = "{}/{} - {}{}{}".format(i + 1, test_count, BOLD[1], test_result.name, BOLD[0])
retry_limit = retries if test_result.name in NONDETERMINISTIC_SCRIPTS else 1
test_status = False
for itx in range(retry_limit):
test_status = run_test(test_result, testdir, stdout, stderr, itx + 1, retry_limit)
# Only append result if test either passes or fails till retry_limit, pushing only
# the result of the last attempt to test_results.
if test_status or retry_limit == itx + 1:
test_results.append(test_result)
break
if failfast and not test_status:
logging.debug("Early exiting after test failure") logging.debug("Early exiting after test failure")
break break