diff --git a/test/lint/lint-python-utf8-encoding.py b/test/lint/lint-python-utf8-encoding.py new file mode 100755 index 0000000000..4f8b307c04 --- /dev/null +++ b/test/lint/lint-python-utf8-encoding.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python3 +# +# Copyright (c) 2018-2022 The Bitcoin Core developers +# Distributed under the MIT software license, see the accompanying +# file COPYING or http://www.opensource.org/licenses/mit-license.php. +# +# Make sure we explicitly open all text files using UTF-8 (or ASCII) encoding to +# avoid potential issues on the BSDs where the locale is not always set. + +import sys +import re + +from subprocess import check_output, CalledProcessError + +EXCLUDED_DIRS = ["src/crc32c/", + "src/secp256k1/"] + + +def get_exclude_args(): + return [":(exclude)" + dir for dir in EXCLUDED_DIRS] + + +def check_fileopens(): + fileopens = list() + + try: + fileopens = check_output(["git", "grep", r" open(", "--", "*.py"] + get_exclude_args(), universal_newlines=True, encoding="utf8").splitlines() + except CalledProcessError as e: + if e.returncode > 1: + raise e + + filtered_fileopens = [fileopen for fileopen in fileopens if not re.search(r"encoding=.(ascii|utf8|utf-8).|open\([^,]*, ['\"][^'\"]*b[^'\"]*['\"]", fileopen)] + + return filtered_fileopens + + +def check_checked_outputs(): + checked_outputs = list() + + try: + checked_outputs = check_output(["git", "grep", "check_output(", "--", "*.py"] + get_exclude_args(), universal_newlines=True, encoding="utf8").splitlines() + except CalledProcessError as e: + if e.returncode > 1: + raise e + + filtered_checked_outputs = [checked_output for checked_output in checked_outputs if re.search(r"universal_newlines=True", checked_output) and not re.search(r"encoding=.(ascii|utf8|utf-8).", checked_output)] + + return filtered_checked_outputs + + +def main(): + exit_code = 0 + + nonexplicit_utf8_fileopens = check_fileopens() + if nonexplicit_utf8_fileopens: + print("Python's open(...) seems to be used to open text files without explicitly specifying encoding='utf8':\n") + for fileopen in nonexplicit_utf8_fileopens: + print(fileopen) + exit_code = 1 + + nonexplicit_utf8_checked_outputs = check_checked_outputs() + if nonexplicit_utf8_checked_outputs: + if nonexplicit_utf8_fileopens: + print("\n") + print("Python's check_output(...) seems to be used to get program outputs without explicitly specifying encoding='utf8':\n") + for checked_output in nonexplicit_utf8_checked_outputs: + print(checked_output) + exit_code = 1 + + sys.exit(exit_code) + + +if __name__ == "__main__": + main() diff --git a/test/lint/lint-python-utf8-encoding.sh b/test/lint/lint-python-utf8-encoding.sh deleted file mode 100755 index 83c34bef7f..0000000000 --- a/test/lint/lint-python-utf8-encoding.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env bash -# -# Copyright (c) 2018-2020 The Bitcoin Core developers -# Distributed under the MIT software license, see the accompanying -# file COPYING or http://www.opensource.org/licenses/mit-license.php. -# -# Make sure we explicitly open all text files using UTF-8 (or ASCII) encoding to -# avoid potential issues on the BSDs where the locale is not always set. - -export LC_ALL=C -EXIT_CODE=0 -OUTPUT=$(git grep " open(" -- "*.py" ":(exclude)src/crc32c/" ":(exclude)src/secp256k1/" | grep -vE "encoding=.(ascii|utf8|utf-8)." | grep -vE "open\([^,]*, ['\"][^'\"]*b[^'\"]*['\"]") -if [[ ${OUTPUT} != "" ]]; then - echo "Python's open(...) seems to be used to open text files without explicitly" - echo "specifying encoding=\"utf8\":" - echo - echo "${OUTPUT}" - EXIT_CODE=1 -fi -OUTPUT=$(git grep "check_output(" -- "*.py" ":(exclude)src/crc32c/" ":(exclude)src/secp256k1/" | grep "universal_newlines=True" | grep -vE "encoding=.(ascii|utf8|utf-8).") -if [[ ${OUTPUT} != "" ]]; then - echo "Python's check_output(...) seems to be used to get program outputs without explicitly" - echo "specifying encoding=\"utf8\":" - echo - echo "${OUTPUT}" - EXIT_CODE=1 -fi -exit ${EXIT_CODE}