fix(tests): Fix retries for the non-deterministic test suite (#5307)

## Issue being fixed or feature implemented
the problem with retries implemented in #4793 is that they don't really
do anything besides fetching results of a failed job multiple times 🙈

## What was done?
partially reverted changes done in #4793, implemented actual job
restart. dropped `--sleep` and `--retries` and added `--attempts`
instead.

## How Has This Been Tested?
Pick any test, add some randomly failing expression somewhere and run it
with some high number of retries.

For example:
```diff
diff --git a/test/functional/feature_dip0020_activation.py b/test/functional/feature_dip0020_activation.py
index 471e4fdc66..b56a954b78 100755
--- a/test/functional/feature_dip0020_activation.py
+++ b/test/functional/feature_dip0020_activation.py
@@ -69,6 +69,7 @@ class DIP0020ActivationTest(BitcoinTestFramework):
         # Should be spendable now
         tx0id = self.node.sendrawtransaction(tx0_hex)
         assert tx0id in set(self.node.getrawmempool())
+        assert int(tx0id[0], 16) < 4
 
 
 if __name__ == '__main__':
```

On develop:
```
./test/functional/test_runner.py feature_dip0020_activation.py --retries=100 --sleep=0
```
if this fails on the first run, it keeps "failing" (simply fetching the
same results actually) till the end.

With this patch:
```
./test/functional/test_runner.py feature_dip0020_activation.py --attempts=100
```
if this fails on the first run, it can actually succeed after a few
attempts now, unless you are extremely unlucky ofc 😄

Also, check [ci results in my repo
](https://cdn.artifacts.gitlab-static.net/93/b4/93b4f8b17e5dcccab1afee165b4d74d90f05800caf65d6c48a83a1a78c979587/2023_04_08/4081291268/4478867166/job.log?response-content-type=text%2Fplain%3B%20charset%3Dutf-8&response-content-disposition=inline&Expires=1680945516&KeyName=gprd-artifacts-cdn&Signature=2d4mHCJBbgRaTDiSQ6kKIy1PdIM=).

Note:
```
...
feature_dip3_v19.py failed at attempt 1/3, Duration: 159s
...
4/179 - feature_dip3_v19.py passed, Duration: 244 s
...
feature_llmq_hpmn.py failed at attempt 1/3, Duration: 284s
...
feature_llmq_hpmn.py failed at attempt 2/3, Duration: 296s
...
11/179 - feature_llmq_hpmn.py failed, Duration: 233 s
...
```

An example with 2 tests failing initially and then passing:
https://gitlab.com/dashpay/dash/-/jobs/4089689970

## Breaking Changes
n/a


## Checklist:
- [x] I have performed a self-review of my own code
- [ ] I have commented my code, particularly in hard-to-understand areas
- [ ] I have added or updated relevant unit/integration/functional/e2e
tests
- [ ] I have made corresponding changes to the documentation

**For repository code-owners and collaborators only**
- [x] I have assigned this pull request to a milestone
This commit is contained in:
UdjinM6 2023-04-15 07:13:47 +03:00 committed by GitHub
parent 9aa886cd4d
commit 33f490e615
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 84 additions and 80 deletions

View File

@ -37,7 +37,7 @@ echo "Using socketevents mode: $SOCKETEVENTS"
EXTRA_ARGS="--dashd-arg=-socketevents=$SOCKETEVENTS" EXTRA_ARGS="--dashd-arg=-socketevents=$SOCKETEVENTS"
set +e set +e
./test/functional/test_runner.py --ci --ansi --combinedlogslen=4000 ${TEST_RUNNER_EXTRA} --failfast --nocleanup --tmpdir=$(pwd)/testdatadirs $PASS_ARGS $EXTRA_ARGS ./test/functional/test_runner.py --ci --attempts=3 --ansi --combinedlogslen=4000 ${TEST_RUNNER_EXTRA} --failfast --nocleanup --tmpdir=$(pwd)/testdatadirs $PASS_ARGS $EXTRA_ARGS
RESULT=$? RESULT=$?
set -e set -e

View File

@ -81,38 +81,11 @@ EXTENDED_SCRIPTS = [
'feature_dbcrash.py', 'feature_dbcrash.py',
] ]
NONDETERMINISTIC_SCRIPTS = [ BASE_SCRIPTS = [
# Scripts that are run by default. # Scripts that are run by default.
# Longest test should go first, to favor running tests in parallel # Longest test should go first, to favor running tests in parallel
'feature_dip3_deterministicmns.py', # NOTE: needs dash_hash to pass 'feature_dip3_deterministicmns.py', # NOTE: needs dash_hash to pass
'feature_llmq_data_recovery.py', 'feature_llmq_data_recovery.py',
# vv Tests less than 2m vv
'feature_dip3_v19.py',
'feature_llmq_signing.py', # NOTE: needs dash_hash to pass
'feature_llmq_signing.py --spork21', # NOTE: needs dash_hash to pass
'feature_llmq_chainlocks.py', # NOTE: needs dash_hash to pass
'feature_llmq_rotation.py', # NOTE: needs dash_hash to pass
'feature_llmq_connections.py', # NOTE: needs dash_hash to pass
'feature_llmq_hpmn.py', # NOTE: needs dash_hash to pass
'feature_llmq_simplepose.py', # NOTE: needs dash_hash to pass
'feature_llmq_is_cl_conflicts.py', # NOTE: needs dash_hash to pass
'feature_llmq_is_migration.py', # NOTE: needs dash_hash to pass
'feature_llmq_is_retroactive.py', # NOTE: needs dash_hash to pass
'feature_llmq_dkgerrors.py', # NOTE: needs dash_hash to pass
'feature_dip4_coinbasemerkleroots.py', # NOTE: needs dash_hash to pass
# vv Tests less than 60s vv
'wallet_listreceivedby.py',
# vv Tests less than 30s vv
'feature_dip0020_activation.py',
'interface_zmq_dash.py',
'mempool_unbroadcast.py',
'p2p_addrv2_relay.py',
'rpc_net.py',
]
BASE_SCRIPTS = [
# Scripts that are run by default.
# Longest test should go first, to favor running tests in parallel
'wallet_hd.py', 'wallet_hd.py',
'wallet_backup.py', 'wallet_backup.py',
# vv Tests less than 5m vv # vv Tests less than 5m vv
@ -134,12 +107,26 @@ BASE_SCRIPTS = [
'wallet_dump.py', 'wallet_dump.py',
'wallet_listtransactions.py', 'wallet_listtransactions.py',
'feature_multikeysporks.py', 'feature_multikeysporks.py',
'feature_dip3_v19.py',
'feature_llmq_signing.py', # NOTE: needs dash_hash to pass
'feature_llmq_signing.py --spork21', # NOTE: needs dash_hash to pass
'feature_llmq_chainlocks.py', # NOTE: needs dash_hash to pass
'feature_llmq_rotation.py', # NOTE: needs dash_hash to pass
'feature_llmq_connections.py', # NOTE: needs dash_hash to pass
'feature_llmq_hpmn.py', # NOTE: needs dash_hash to pass
'feature_llmq_simplepose.py', # NOTE: needs dash_hash to pass
'feature_llmq_is_cl_conflicts.py', # NOTE: needs dash_hash to pass
'feature_llmq_is_migration.py', # NOTE: needs dash_hash to pass
'feature_llmq_is_retroactive.py', # NOTE: needs dash_hash to pass
'feature_llmq_dkgerrors.py', # NOTE: needs dash_hash to pass
'feature_dip4_coinbasemerkleroots.py', # NOTE: needs dash_hash to pass
# vv Tests less than 60s vv # vv Tests less than 60s vv
'p2p_sendheaders.py', # NOTE: needs dash_hash to pass 'p2p_sendheaders.py', # NOTE: needs dash_hash to pass
'p2p_sendheaders_compressed.py', # NOTE: needs dash_hash to pass 'p2p_sendheaders_compressed.py', # NOTE: needs dash_hash to pass
'wallet_importmulti.py', 'wallet_importmulti.py',
'mempool_limit.py', 'mempool_limit.py',
'rpc_txoutproof.py', 'rpc_txoutproof.py',
'wallet_listreceivedby.py',
'wallet_abandonconflict.py', 'wallet_abandonconflict.py',
'feature_csv_activation.py', 'feature_csv_activation.py',
'rpc_rawtransaction.py', 'rpc_rawtransaction.py',
@ -149,6 +136,7 @@ BASE_SCRIPTS = [
'rpc_quorum.py', 'rpc_quorum.py',
'wallet_keypool_topup.py', 'wallet_keypool_topup.py',
'feature_fee_estimation.py', 'feature_fee_estimation.py',
'interface_zmq_dash.py',
'interface_zmq.py', 'interface_zmq.py',
'interface_bitcoin_cli.py', 'interface_bitcoin_cli.py',
'mempool_resurrect.py', 'mempool_resurrect.py',
@ -175,6 +163,7 @@ BASE_SCRIPTS = [
'rpc_whitelist.py', 'rpc_whitelist.py',
'feature_proxy.py', 'feature_proxy.py',
'rpc_signrawtransaction.py', 'rpc_signrawtransaction.py',
'p2p_addrv2_relay.py',
'wallet_groups.py', 'wallet_groups.py',
'p2p_disconnect_ban.py', 'p2p_disconnect_ban.py',
'feature_addressindex.py', 'feature_addressindex.py',
@ -185,6 +174,7 @@ BASE_SCRIPTS = [
'rpc_deprecated.py', 'rpc_deprecated.py',
'wallet_disable.py', 'wallet_disable.py',
'p2p_getdata.py', 'p2p_getdata.py',
'rpc_net.py',
'wallet_keypool.py', 'wallet_keypool.py',
'wallet_keypool_hd.py', 'wallet_keypool_hd.py',
'p2p_mempool.py', 'p2p_mempool.py',
@ -255,6 +245,7 @@ BASE_SCRIPTS = [
'wallet_create_tx.py', 'wallet_create_tx.py',
'p2p_fingerprint.py', 'p2p_fingerprint.py',
'rpc_platform_filter.py', 'rpc_platform_filter.py',
'feature_dip0020_activation.py',
'feature_uacomment.py', 'feature_uacomment.py',
'wallet_coinbase_category.py', 'wallet_coinbase_category.py',
'feature_filelock.py', 'feature_filelock.py',
@ -262,6 +253,7 @@ BASE_SCRIPTS = [
'p2p_blockfilters.py', 'p2p_blockfilters.py',
'feature_asmap.py', 'feature_asmap.py',
'feature_includeconf.py', 'feature_includeconf.py',
'mempool_unbroadcast.py',
'rpc_deriveaddresses.py', 'rpc_deriveaddresses.py',
'rpc_deriveaddresses.py --usecli', 'rpc_deriveaddresses.py --usecli',
'rpc_scantxoutset.py', 'rpc_scantxoutset.py',
@ -279,8 +271,8 @@ BASE_SCRIPTS = [
# Put them in a random line within the section that fits their approximate run-time # Put them in a random line within the section that fits their approximate run-time
] ]
# Place NONDETERMINISTIC_SCRIPTS first since it has the 3 longest running tests # Place EXTENDED_SCRIPTS first since it has the 3 longest running tests
ALL_SCRIPTS = NONDETERMINISTIC_SCRIPTS + EXTENDED_SCRIPTS + BASE_SCRIPTS ALL_SCRIPTS = EXTENDED_SCRIPTS + BASE_SCRIPTS
NON_SCRIPTS = [ NON_SCRIPTS = [
# These are python files that live in the functional tests directory, but are not test scripts. # These are python files that live in the functional tests directory, but are not test scripts.
@ -298,6 +290,7 @@ def main():
Help text and arguments for individual test script:''', Help text and arguments for individual test script:''',
formatter_class=argparse.RawTextHelpFormatter) formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument('--ansi', action='store_true', default=sys.stdout.isatty(), help="Use ANSI colors and dots in output (enabled by default when standard output is a TTY)") parser.add_argument('--ansi', action='store_true', default=sys.stdout.isatty(), help="Use ANSI colors and dots in output (enabled by default when standard output is a TTY)")
parser.add_argument('--attempts', '-a', type=int, default=1, help='how many attempts should be allowed for the non-deterministic test suite. Default=1.')
parser.add_argument('--combinedlogslen', '-c', type=int, default=0, metavar='n', help='On failure, print a log (of length n lines) to the console, combined from the test framework and all test nodes.') parser.add_argument('--combinedlogslen', '-c', type=int, default=0, metavar='n', help='On failure, print a log (of length n lines) to the console, combined from the test framework and all test nodes.')
parser.add_argument('--coverage', action='store_true', help='generate a basic coverage report for the RPC interface') parser.add_argument('--coverage', action='store_true', help='generate a basic coverage report for the RPC interface')
parser.add_argument('--ci', action='store_true', help='Run checks and code that are usually only enabled in a continuous integration environment') parser.add_argument('--ci', action='store_true', help='Run checks and code that are usually only enabled in a continuous integration environment')
@ -310,8 +303,6 @@ def main():
parser.add_argument('--tmpdirprefix', '-t', default=tempfile.gettempdir(), help="Root directory for datadirs") parser.add_argument('--tmpdirprefix', '-t', default=tempfile.gettempdir(), help="Root directory for datadirs")
parser.add_argument('--failfast', '-F', action='store_true', help='stop execution after the first test failure') parser.add_argument('--failfast', '-F', action='store_true', help='stop execution after the first test failure')
parser.add_argument('--filter', help='filter scripts to run by regular expression') parser.add_argument('--filter', help='filter scripts to run by regular expression')
parser.add_argument('--retries', '-r', type=int, default=3, help='how many reattempts should be allowed for the non-deterministic test suite. Default=3.')
parser.add_argument('--sleep', '-s', type=int, default=15, help='how many seconds should the test sleep before reattempting (in seconds). Default=15.')
args, unknown_args = parser.parse_known_args() args, unknown_args = parser.parse_known_args()
if not args.ansi: if not args.ansi:
@ -417,8 +408,7 @@ def main():
build_dir=config["environment"]["BUILDDIR"], build_dir=config["environment"]["BUILDDIR"],
tmpdir=tmpdir, tmpdir=tmpdir,
jobs=args.jobs, jobs=args.jobs,
retries=args.retries, attempts=args.attempts,
retry_sleep=args.sleep,
enable_coverage=args.coverage, enable_coverage=args.coverage,
args=passon_args, args=passon_args,
combined_logs_len=args.combinedlogslen, combined_logs_len=args.combinedlogslen,
@ -427,7 +417,7 @@ def main():
use_term_control=args.ansi, use_term_control=args.ansi,
) )
def run_tests(*, test_list, src_dir, build_dir, tmpdir, jobs=1, retries=3, retry_sleep=15, enable_coverage=False, args=None, combined_logs_len=0,failfast=False, runs_ci=False, use_term_control): def run_tests(*, test_list, src_dir, build_dir, tmpdir, jobs=1, attempts=1, enable_coverage=False, args=None, combined_logs_len=0,failfast=False, runs_ci=False, use_term_control):
args = args or [] args = args or []
# Warn if dashd is already running # Warn if dashd is already running
@ -482,24 +472,22 @@ def run_tests(*, test_list, src_dir, build_dir, tmpdir, jobs=1, retries=3, retry
flags=flags, flags=flags,
timeout_duration=30 * 60 if runs_ci else float('inf'), # in seconds timeout_duration=30 * 60 if runs_ci else float('inf'), # in seconds
use_term_control=use_term_control, use_term_control=use_term_control,
attempts=attempts,
) )
start_time = time.time() start_time = time.time()
test_results = [] test_results = []
max_len_name = len(max(test_list, key=len)) max_len_name = len(max(test_list, key=len))
test_count = len(test_list) test_count = len(test_list)
for i in range(test_count):
def run_test(test_result, testdir, stdout, stderr, attempt, limit): test_result, testdir, stdout, stderr = job_queue.get_next()
test_results.append(test_result)
done_str = "{}/{} - {}{}{}".format(i + 1, test_count, BOLD[1], test_result.name, BOLD[0])
if test_result.status == "Passed": if test_result.status == "Passed":
logging.debug("%s passed, Duration: %s s" % (done_str, test_result.time)) logging.debug("%s passed, Duration: %s s" % (done_str, test_result.time))
return True
elif test_result.status == "Skipped": elif test_result.status == "Skipped":
logging.debug("%s skipped" % (done_str)) logging.debug("%s skipped" % (done_str))
return True
else: else:
if limit > 1:
logging.debug("%s failed at attempt %s of %s, Duration: %s s\n" % (done_str, attempt, limit, test_result.time))
if limit == attempt:
print("%s failed, Duration: %s s\n" % (done_str, test_result.time)) print("%s failed, Duration: %s s\n" % (done_str, test_result.time))
print(BOLD[1] + 'stdout:\n' + BOLD[0] + stdout + '\n') print(BOLD[1] + 'stdout:\n' + BOLD[0] + stdout + '\n')
print(BOLD[1] + 'stderr:\n' + BOLD[0] + stderr + '\n') print(BOLD[1] + 'stderr:\n' + BOLD[0] + stderr + '\n')
@ -514,23 +502,8 @@ def run_tests(*, test_list, src_dir, build_dir, tmpdir, jobs=1, retries=3, retry
combined_logs_args += ['--color'] combined_logs_args += ['--color']
combined_logs, _ = subprocess.Popen(combined_logs_args, universal_newlines=True, stdout=subprocess.PIPE).communicate() combined_logs, _ = subprocess.Popen(combined_logs_args, universal_newlines=True, stdout=subprocess.PIPE).communicate()
print("\n".join(deque(combined_logs.splitlines(), combined_logs_len))) print("\n".join(deque(combined_logs.splitlines(), combined_logs_len)))
else:
time.sleep(retry_sleep)
return False
for i in range(test_count): if failfast:
test_result, testdir, stdout, stderr = job_queue.get_next()
done_str = "{}/{} - {}{}{}".format(i + 1, test_count, BOLD[1], test_result.name, BOLD[0])
retry_limit = retries if test_result.name in NONDETERMINISTIC_SCRIPTS else 1
test_status = False
for itx in range(retry_limit):
test_status = run_test(test_result, testdir, stdout, stderr, itx + 1, retry_limit)
# Only append result if test either passes or fails till retry_limit, pushing only
# the result of the last attempt to test_results.
if test_status or retry_limit == itx + 1:
test_results.append(test_result)
break
if failfast and not test_status:
logging.debug("Early exiting after test failure") logging.debug("Early exiting after test failure")
break break
@ -583,7 +556,7 @@ class TestHandler:
Trigger the test scripts passed in via the list. Trigger the test scripts passed in via the list.
""" """
def __init__(self, *, num_tests_parallel, tests_dir, tmpdir, test_list, flags, timeout_duration, use_term_control): def __init__(self, *, num_tests_parallel, tests_dir, tmpdir, test_list, flags, timeout_duration, use_term_control, attempts):
assert num_tests_parallel >= 1 assert num_tests_parallel >= 1
self.num_jobs = num_tests_parallel self.num_jobs = num_tests_parallel
self.tests_dir = tests_dir self.tests_dir = tests_dir
@ -594,6 +567,7 @@ class TestHandler:
self.num_running = 0 self.num_running = 0
self.jobs = [] self.jobs = []
self.use_term_control = use_term_control self.use_term_control = use_term_control
self.attempts = attempts
def get_next(self): def get_next(self):
while self.num_running < self.num_jobs and self.test_list: while self.num_running < self.num_jobs and self.test_list:
@ -615,7 +589,9 @@ class TestHandler:
stderr=log_stderr), stderr=log_stderr),
testdir, testdir,
log_stdout, log_stdout,
log_stderr)) log_stderr,
portseed,
1)) # attempt
if not self.jobs: if not self.jobs:
raise IndexError('pop from empty list') raise IndexError('pop from empty list')
@ -628,7 +604,7 @@ class TestHandler:
# Return first proc that finishes # Return first proc that finishes
time.sleep(.5) time.sleep(.5)
for job in self.jobs: for job in self.jobs:
(name, start_time, proc, testdir, log_out, log_err) = job (name, start_time, proc, testdir, log_out, log_err, portseed, attempt) = job
if int(time.time() - start_time) > self.timeout_duration: if int(time.time() - start_time) > self.timeout_duration:
# Timeout individual tests if timeout is specified (to stop # Timeout individual tests if timeout is specified (to stop
# tests hanging and not providing useful output). # tests hanging and not providing useful output).
@ -641,6 +617,34 @@ class TestHandler:
status = "Passed" status = "Passed"
elif proc.returncode == TEST_EXIT_SKIPPED: elif proc.returncode == TEST_EXIT_SKIPPED:
status = "Skipped" status = "Skipped"
elif attempt < self.attempts:
# cleanup
if self.use_term_control:
clearline = '\r' + (' ' * dot_count) + '\r'
print(clearline, end='', flush=True)
dot_count = 0
shutil.rmtree(testdir, ignore_errors=True)
self.jobs.remove(job)
print(f"{name} failed at attempt {attempt}/{self.attempts}, Duration: {int(time.time() - start_time)} s")
# start over
portseed_arg = ["--portseed={}".format(portseed)]
log_stdout = tempfile.SpooledTemporaryFile(max_size=2**16)
log_stderr = tempfile.SpooledTemporaryFile(max_size=2**16)
test_argv = name.split()
tmpdir_arg = ["--tmpdir={}".format(testdir)]
self.jobs.append((name,
time.time(),
subprocess.Popen([sys.executable, self.tests_dir + test_argv[0]] + test_argv[1:] + self.flags + portseed_arg + tmpdir_arg,
universal_newlines=True,
stdout=log_stdout,
stderr=log_stderr),
testdir,
log_stdout,
log_stderr,
portseed,
attempt + 1)) # attempt
# no results for now, move to the next job
continue
else: else:
status = "Failed" status = "Failed"
self.num_running -= 1 self.num_running -= 1