mirror of
https://github.com/dashpay/dash.git
synced 2024-12-24 19:42:46 +01:00
merge bitcoin#24932: Convert lint-locale-dependence.sh to Python
This commit is contained in:
parent
f745b7f7ef
commit
852f55e23c
264
test/lint/lint-locale-dependence.py
Executable file
264
test/lint/lint-locale-dependence.py
Executable file
@ -0,0 +1,264 @@
|
||||
#!/usr/bin/env python3
|
||||
# Copyright (c) 2018-2022 The Bitcoin Core developers
|
||||
# Distributed under the MIT software license, see the accompanying
|
||||
# file COPYING or http://www.opensource.org/licenses/mit-license.php.
|
||||
#
|
||||
# Be aware that bitcoind and bitcoin-qt differ in terms of localization: Qt
|
||||
# opts in to POSIX localization by running setlocale(LC_ALL, "") on startup,
|
||||
# whereas no such call is made in bitcoind.
|
||||
#
|
||||
# Qt runs setlocale(LC_ALL, "") on initialization. This installs the locale
|
||||
# specified by the user's LC_ALL (or LC_*) environment variable as the new
|
||||
# C locale.
|
||||
#
|
||||
# In contrast, bitcoind does not opt in to localization -- no call to
|
||||
# setlocale(LC_ALL, "") is made and the environment variables LC_* are
|
||||
# thus ignored.
|
||||
#
|
||||
# This results in situations where bitcoind is guaranteed to be running
|
||||
# with the classic locale ("C") whereas the locale of bitcoin-qt will vary
|
||||
# depending on the user's environment variables.
|
||||
#
|
||||
# An example: Assuming the environment variable LC_ALL=de_DE then the
|
||||
# call std::to_string(1.23) will return "1.230000" in bitcoind but
|
||||
# "1,230000" in bitcoin-qt.
|
||||
#
|
||||
# From the Qt documentation:
|
||||
# "On Unix/Linux Qt is configured to use the system locale settings by default.
|
||||
# This can cause a conflict when using POSIX functions, for instance, when
|
||||
# converting between data types such as floats and strings, since the notation
|
||||
# may differ between locales. To get around this problem, call the POSIX function
|
||||
# setlocale(LC_NUMERIC,"C") right after initializing QApplication, QGuiApplication
|
||||
# or QCoreApplication to reset the locale that is used for number formatting to
|
||||
# "C"-locale."
|
||||
#
|
||||
# See https://doc.qt.io/qt-5/qcoreapplication.html#locale-settings and
|
||||
# https://stackoverflow.com/a/34878283 for more details.
|
||||
#
|
||||
# TODO: Reduce KNOWN_VIOLATIONS by replacing uses of locale dependent stoul/strtol with locale
|
||||
# independent ToIntegral<T>(...).
|
||||
# TODO: Reduce KNOWN_VIOLATIONS by replacing uses of locale dependent snprintf with strprintf.
|
||||
|
||||
import re
|
||||
import sys
|
||||
|
||||
from subprocess import check_output, CalledProcessError
|
||||
|
||||
|
||||
KNOWN_VIOLATIONS = [
|
||||
"src/bitcoin-tx.cpp.*stoul",
|
||||
"src/dbwrapper.cpp:.*vsnprintf",
|
||||
"src/test/dbwrapper_tests.cpp:.*snprintf",
|
||||
"src/test/fuzz/locale.cpp",
|
||||
"src/test/fuzz/string.cpp",
|
||||
"src/util/strencodings.cpp:.*strtoll",
|
||||
"src/util/system.cpp:.*fprintf"
|
||||
]
|
||||
|
||||
REGEXP_EXTERNAL_DEPENDENCIES_EXCLUSIONS = [
|
||||
"src/crypto/ctaes/",
|
||||
"src/leveldb/",
|
||||
"src/secp256k1/",
|
||||
"src/minisketch/",
|
||||
"src/tinyformat.h",
|
||||
"src/univalue/",
|
||||
"src/dashbls/",
|
||||
"src/immer/"
|
||||
]
|
||||
|
||||
LOCALE_DEPENDENT_FUNCTIONS = [
|
||||
"alphasort", # LC_COLLATE (via strcoll)
|
||||
"asctime", # LC_TIME (directly)
|
||||
"asprintf", # (via vasprintf)
|
||||
"atof", # LC_NUMERIC (via strtod)
|
||||
"atoi", # LC_NUMERIC (via strtol)
|
||||
"atol", # LC_NUMERIC (via strtol)
|
||||
"atoll", # (via strtoll)
|
||||
"atoq",
|
||||
"btowc", # LC_CTYPE (directly)
|
||||
"ctime", # (via asctime or localtime)
|
||||
"dprintf", # (via vdprintf)
|
||||
"fgetwc",
|
||||
"fgetws",
|
||||
"fold_case", # boost::locale::fold_case
|
||||
"fprintf", # (via vfprintf)
|
||||
"fputwc",
|
||||
"fputws",
|
||||
"fscanf", # (via __vfscanf)
|
||||
"fwprintf", # (via __vfwprintf)
|
||||
"getdate", # via __getdate_r => isspace // __localtime_r
|
||||
"getwc",
|
||||
"getwchar",
|
||||
"is_digit", # boost::algorithm::is_digit
|
||||
"is_space", # boost::algorithm::is_space
|
||||
"isalnum", # LC_CTYPE
|
||||
"isalpha", # LC_CTYPE
|
||||
"isblank", # LC_CTYPE
|
||||
"iscntrl", # LC_CTYPE
|
||||
"isctype", # LC_CTYPE
|
||||
"isdigit", # LC_CTYPE
|
||||
"isgraph", # LC_CTYPE
|
||||
"islower", # LC_CTYPE
|
||||
"isprint", # LC_CTYPE
|
||||
"ispunct", # LC_CTYPE
|
||||
"isspace", # LC_CTYPE
|
||||
"isupper", # LC_CTYPE
|
||||
"iswalnum", # LC_CTYPE
|
||||
"iswalpha", # LC_CTYPE
|
||||
"iswblank", # LC_CTYPE
|
||||
"iswcntrl", # LC_CTYPE
|
||||
"iswctype", # LC_CTYPE
|
||||
"iswdigit", # LC_CTYPE
|
||||
"iswgraph", # LC_CTYPE
|
||||
"iswlower", # LC_CTYPE
|
||||
"iswprint", # LC_CTYPE
|
||||
"iswpunct", # LC_CTYPE
|
||||
"iswspace", # LC_CTYPE
|
||||
"iswupper", # LC_CTYPE
|
||||
"iswxdigit", # LC_CTYPE
|
||||
"isxdigit", # LC_CTYPE
|
||||
"localeconv", # LC_NUMERIC + LC_MONETARY
|
||||
"mblen", # LC_CTYPE
|
||||
"mbrlen",
|
||||
"mbrtowc",
|
||||
"mbsinit",
|
||||
"mbsnrtowcs",
|
||||
"mbsrtowcs",
|
||||
"mbstowcs", # LC_CTYPE
|
||||
"mbtowc", # LC_CTYPE
|
||||
"mktime",
|
||||
"normalize", # boost::locale::normalize
|
||||
"printf", # LC_NUMERIC
|
||||
"putwc",
|
||||
"putwchar",
|
||||
"scanf", # LC_NUMERIC
|
||||
"setlocale",
|
||||
"snprintf",
|
||||
"sprintf",
|
||||
"sscanf",
|
||||
"std::locale::global",
|
||||
"std::to_string",
|
||||
"stod",
|
||||
"stof",
|
||||
"stoi",
|
||||
"stol",
|
||||
"stold",
|
||||
"stoll",
|
||||
"stoul",
|
||||
"stoull",
|
||||
"strcasecmp",
|
||||
"strcasestr",
|
||||
"strcoll", # LC_COLLATE
|
||||
#"strerror",
|
||||
"strfmon",
|
||||
"strftime", # LC_TIME
|
||||
"strncasecmp",
|
||||
"strptime",
|
||||
"strtod", # LC_NUMERIC
|
||||
"strtof",
|
||||
"strtoimax",
|
||||
"strtol", # LC_NUMERIC
|
||||
"strtold",
|
||||
"strtoll",
|
||||
"strtoq",
|
||||
"strtoul", # LC_NUMERIC
|
||||
"strtoull",
|
||||
"strtoumax",
|
||||
"strtouq",
|
||||
"strxfrm", # LC_COLLATE
|
||||
"swprintf",
|
||||
"to_lower", # boost::locale::to_lower
|
||||
"to_title", # boost::locale::to_title
|
||||
"to_upper", # boost::locale::to_upper
|
||||
"tolower", # LC_CTYPE
|
||||
"toupper", # LC_CTYPE
|
||||
"towctrans",
|
||||
"towlower", # LC_CTYPE
|
||||
"towupper", # LC_CTYPE
|
||||
"trim", # boost::algorithm::trim
|
||||
"trim_left", # boost::algorithm::trim_left
|
||||
"trim_right", # boost::algorithm::trim_right
|
||||
"ungetwc",
|
||||
"vasprintf",
|
||||
"vdprintf",
|
||||
"versionsort",
|
||||
"vfprintf",
|
||||
"vfscanf",
|
||||
"vfwprintf",
|
||||
"vprintf",
|
||||
"vscanf",
|
||||
"vsnprintf",
|
||||
"vsprintf",
|
||||
"vsscanf",
|
||||
"vswprintf",
|
||||
"vwprintf",
|
||||
"wcrtomb",
|
||||
"wcscasecmp",
|
||||
"wcscoll", # LC_COLLATE
|
||||
"wcsftime", # LC_TIME
|
||||
"wcsncasecmp",
|
||||
"wcsnrtombs",
|
||||
"wcsrtombs",
|
||||
"wcstod", # LC_NUMERIC
|
||||
"wcstof",
|
||||
"wcstoimax",
|
||||
"wcstol", # LC_NUMERIC
|
||||
"wcstold",
|
||||
"wcstoll",
|
||||
"wcstombs", # LC_CTYPE
|
||||
"wcstoul", # LC_NUMERIC
|
||||
"wcstoull",
|
||||
"wcstoumax",
|
||||
"wcswidth",
|
||||
"wcsxfrm", # LC_COLLATE
|
||||
"wctob",
|
||||
"wctomb", # LC_CTYPE
|
||||
"wctrans",
|
||||
"wctype",
|
||||
"wcwidth",
|
||||
"wprintf"
|
||||
]
|
||||
|
||||
|
||||
def find_locale_dependent_function_uses():
|
||||
regexp_locale_dependent_functions = "|".join(LOCALE_DEPENDENT_FUNCTIONS)
|
||||
exclude_args = [":(exclude)" + excl for excl in REGEXP_EXTERNAL_DEPENDENCIES_EXCLUSIONS]
|
||||
git_grep_command = ["git", "grep", "-E", "[^a-zA-Z0-9_\\`'\"<>](" + regexp_locale_dependent_functions + "(_r|_s)?)[^a-zA-Z0-9_\\`'\"<>]", "--", "*.cpp", "*.h"] + exclude_args
|
||||
git_grep_output = list()
|
||||
|
||||
try:
|
||||
git_grep_output = check_output(git_grep_command, universal_newlines=True, encoding="utf8").splitlines()
|
||||
except CalledProcessError as e:
|
||||
if e.returncode > 1:
|
||||
raise e
|
||||
|
||||
return git_grep_output
|
||||
|
||||
|
||||
def main():
|
||||
exit_code = 0
|
||||
|
||||
regexp_ignore_known_violations = "|".join(KNOWN_VIOLATIONS)
|
||||
git_grep_output = find_locale_dependent_function_uses()
|
||||
|
||||
for locale_dependent_function in LOCALE_DEPENDENT_FUNCTIONS:
|
||||
matches = [line for line in git_grep_output
|
||||
if re.search("[^a-zA-Z0-9_\\`'\"<>]" + locale_dependent_function + "(_r|_s)?[^a-zA-Z0-9_\\`'\"<>]", line)
|
||||
and not re.search("\\.(c|cpp|h):\\s*(//|\\*|/\\*|\").*" + locale_dependent_function, line)
|
||||
and not re.search(regexp_ignore_known_violations, line)]
|
||||
if matches:
|
||||
print(f"The locale dependent function {locale_dependent_function}(...) appears to be used:")
|
||||
for match in matches:
|
||||
print(match)
|
||||
print("")
|
||||
exit_code = 1
|
||||
|
||||
if exit_code == 1:
|
||||
print("Unnecessary locale depedence can cause bugs that are very tricky to isolate and fix. Please avoid using locale dependent functions if possible.\n")
|
||||
print(f"Advice not applicable in this specific case? Add an exception by updating the ignore list in {sys.argv[0]}")
|
||||
|
||||
sys.exit(exit_code)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@ -1,246 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
# Copyright (c) 2018-2020 The Bitcoin Core developers
|
||||
# Distributed under the MIT software license, see the accompanying
|
||||
# file COPYING or http://www.opensource.org/licenses/mit-license.php.
|
||||
|
||||
export LC_ALL=C
|
||||
|
||||
# TODO: Reduce KNOWN_VIOLATIONS by replacing uses of locale dependent stoul/strtol with locale
|
||||
# independent ToIntegral<T>(...).
|
||||
# TODO: Reduce KNOWN_VIOLATIONS by replacing uses of locale dependent snprintf with strprintf.
|
||||
|
||||
# Be aware that bitcoind and bitcoin-qt differ in terms of localization: Qt
|
||||
# opts in to POSIX localization by running setlocale(LC_ALL, "") on startup,
|
||||
# whereas no such call is made in bitcoind.
|
||||
#
|
||||
# Qt runs setlocale(LC_ALL, "") on initialization. This installs the locale
|
||||
# specified by the user's LC_ALL (or LC_*) environment variable as the new
|
||||
# C locale.
|
||||
#
|
||||
# In contrast, bitcoind does not opt in to localization -- no call to
|
||||
# setlocale(LC_ALL, "") is made and the environment variables LC_* are
|
||||
# thus ignored.
|
||||
#
|
||||
# This results in situations where bitcoind is guaranteed to be running
|
||||
# with the classic locale ("C") whereas the locale of bitcoin-qt will vary
|
||||
# depending on the user's environment variables.
|
||||
#
|
||||
# An example: Assuming the environment variable LC_ALL=de_DE then the
|
||||
# call std::to_string(1.23) will return "1.230000" in bitcoind but
|
||||
# "1,230000" in bitcoin-qt.
|
||||
#
|
||||
# From the Qt documentation:
|
||||
# "On Unix/Linux Qt is configured to use the system locale settings by default.
|
||||
# This can cause a conflict when using POSIX functions, for instance, when
|
||||
# converting between data types such as floats and strings, since the notation
|
||||
# may differ between locales. To get around this problem, call the POSIX function
|
||||
# setlocale(LC_NUMERIC,"C") right after initializing QApplication, QGuiApplication
|
||||
# or QCoreApplication to reset the locale that is used for number formatting to
|
||||
# "C"-locale."
|
||||
#
|
||||
# See https://doc.qt.io/qt-5/qcoreapplication.html#locale-settings and
|
||||
# https://stackoverflow.com/a/34878283 for more details.
|
||||
|
||||
KNOWN_VIOLATIONS=(
|
||||
"src/bitcoin-tx.cpp.*stoul"
|
||||
"src/dbwrapper.cpp:.*vsnprintf"
|
||||
"src/test/dbwrapper_tests.cpp:.*snprintf"
|
||||
"src/test/fuzz/locale.cpp"
|
||||
"src/test/fuzz/string.cpp"
|
||||
"src/util/strencodings.cpp:.*strtoll"
|
||||
"src/util/system.cpp:.*fprintf"
|
||||
)
|
||||
|
||||
REGEXP_IGNORE_EXTERNAL_DEPENDENCIES="^src/(dashbls/|immer/|crypto/ctaes/|leveldb/|secp256k1/|minisketch/|tinyformat.h|univalue/)"
|
||||
|
||||
LOCALE_DEPENDENT_FUNCTIONS=(
|
||||
alphasort # LC_COLLATE (via strcoll)
|
||||
asctime # LC_TIME (directly)
|
||||
asprintf # (via vasprintf)
|
||||
atof # LC_NUMERIC (via strtod)
|
||||
atoi # LC_NUMERIC (via strtol)
|
||||
atol # LC_NUMERIC (via strtol)
|
||||
atoll # (via strtoll)
|
||||
atoq
|
||||
btowc # LC_CTYPE (directly)
|
||||
ctime # (via asctime or localtime)
|
||||
dprintf # (via vdprintf)
|
||||
fgetwc
|
||||
fgetws
|
||||
fold_case # boost::locale::fold_case
|
||||
fprintf # (via vfprintf)
|
||||
fputwc
|
||||
fputws
|
||||
fscanf # (via __vfscanf)
|
||||
fwprintf # (via __vfwprintf)
|
||||
getdate # via __getdate_r => isspace // __localtime_r
|
||||
getwc
|
||||
getwchar
|
||||
is_digit # boost::algorithm::is_digit
|
||||
is_space # boost::algorithm::is_space
|
||||
isalnum # LC_CTYPE
|
||||
isalpha # LC_CTYPE
|
||||
isblank # LC_CTYPE
|
||||
iscntrl # LC_CTYPE
|
||||
isctype # LC_CTYPE
|
||||
isdigit # LC_CTYPE
|
||||
isgraph # LC_CTYPE
|
||||
islower # LC_CTYPE
|
||||
isprint # LC_CTYPE
|
||||
ispunct # LC_CTYPE
|
||||
isspace # LC_CTYPE
|
||||
isupper # LC_CTYPE
|
||||
iswalnum # LC_CTYPE
|
||||
iswalpha # LC_CTYPE
|
||||
iswblank # LC_CTYPE
|
||||
iswcntrl # LC_CTYPE
|
||||
iswctype # LC_CTYPE
|
||||
iswdigit # LC_CTYPE
|
||||
iswgraph # LC_CTYPE
|
||||
iswlower # LC_CTYPE
|
||||
iswprint # LC_CTYPE
|
||||
iswpunct # LC_CTYPE
|
||||
iswspace # LC_CTYPE
|
||||
iswupper # LC_CTYPE
|
||||
iswxdigit # LC_CTYPE
|
||||
isxdigit # LC_CTYPE
|
||||
localeconv # LC_NUMERIC + LC_MONETARY
|
||||
mblen # LC_CTYPE
|
||||
mbrlen
|
||||
mbrtowc
|
||||
mbsinit
|
||||
mbsnrtowcs
|
||||
mbsrtowcs
|
||||
mbstowcs # LC_CTYPE
|
||||
mbtowc # LC_CTYPE
|
||||
mktime
|
||||
normalize # boost::locale::normalize
|
||||
printf # LC_NUMERIC
|
||||
putwc
|
||||
putwchar
|
||||
scanf # LC_NUMERIC
|
||||
setlocale
|
||||
snprintf
|
||||
sprintf
|
||||
sscanf
|
||||
std::locale::global
|
||||
std::to_string
|
||||
stod
|
||||
stof
|
||||
stoi
|
||||
stol
|
||||
stold
|
||||
stoll
|
||||
stoul
|
||||
stoull
|
||||
strcasecmp
|
||||
strcasestr
|
||||
strcoll # LC_COLLATE
|
||||
# strerror
|
||||
strfmon
|
||||
strftime # LC_TIME
|
||||
strncasecmp
|
||||
strptime
|
||||
strtod # LC_NUMERIC
|
||||
strtof
|
||||
strtoimax
|
||||
strtol # LC_NUMERIC
|
||||
strtold
|
||||
strtoll
|
||||
strtoq
|
||||
strtoul # LC_NUMERIC
|
||||
strtoull
|
||||
strtoumax
|
||||
strtouq
|
||||
strxfrm # LC_COLLATE
|
||||
swprintf
|
||||
to_lower # boost::locale::to_lower
|
||||
to_title # boost::locale::to_title
|
||||
to_upper # boost::locale::to_upper
|
||||
tolower # LC_CTYPE
|
||||
toupper # LC_CTYPE
|
||||
towctrans
|
||||
towlower # LC_CTYPE
|
||||
towupper # LC_CTYPE
|
||||
trim # boost::algorithm::trim
|
||||
trim_left # boost::algorithm::trim_left
|
||||
trim_right # boost::algorithm::trim_right
|
||||
ungetwc
|
||||
vasprintf
|
||||
vdprintf
|
||||
versionsort
|
||||
vfprintf
|
||||
vfscanf
|
||||
vfwprintf
|
||||
vprintf
|
||||
vscanf
|
||||
vsnprintf
|
||||
vsprintf
|
||||
vsscanf
|
||||
vswprintf
|
||||
vwprintf
|
||||
wcrtomb
|
||||
wcscasecmp
|
||||
wcscoll # LC_COLLATE
|
||||
wcsftime # LC_TIME
|
||||
wcsncasecmp
|
||||
wcsnrtombs
|
||||
wcsrtombs
|
||||
wcstod # LC_NUMERIC
|
||||
wcstof
|
||||
wcstoimax
|
||||
wcstol # LC_NUMERIC
|
||||
wcstold
|
||||
wcstoll
|
||||
wcstombs # LC_CTYPE
|
||||
wcstoul # LC_NUMERIC
|
||||
wcstoull
|
||||
wcstoumax
|
||||
wcswidth
|
||||
wcsxfrm # LC_COLLATE
|
||||
wctob
|
||||
wctomb # LC_CTYPE
|
||||
wctrans
|
||||
wctype
|
||||
wcwidth
|
||||
wprintf
|
||||
)
|
||||
|
||||
function join_array {
|
||||
local IFS="$1"
|
||||
shift
|
||||
echo "$*"
|
||||
}
|
||||
|
||||
REGEXP_IGNORE_KNOWN_VIOLATIONS=$(join_array "|" "${KNOWN_VIOLATIONS[@]}")
|
||||
|
||||
# Invoke "git grep" only once in order to minimize run-time
|
||||
REGEXP_LOCALE_DEPENDENT_FUNCTIONS=$(join_array "|" "${LOCALE_DEPENDENT_FUNCTIONS[@]}")
|
||||
GIT_GREP_OUTPUT=$(git grep -E "[^a-zA-Z0-9_\`'\"<>](${REGEXP_LOCALE_DEPENDENT_FUNCTIONS}(_r|_s)?)[^a-zA-Z0-9_\`'\"<>]" -- "*.cpp" "*.h")
|
||||
|
||||
EXIT_CODE=0
|
||||
for LOCALE_DEPENDENT_FUNCTION in "${LOCALE_DEPENDENT_FUNCTIONS[@]}"; do
|
||||
MATCHES=$(grep -E "[^a-zA-Z0-9_\`'\"<>]${LOCALE_DEPENDENT_FUNCTION}(_r|_s)?[^a-zA-Z0-9_\`'\"<>]" <<< "${GIT_GREP_OUTPUT}" | \
|
||||
grep -vE "\.(c|cpp|h):\s*(//|\*|/\*|\").*${LOCALE_DEPENDENT_FUNCTION}")
|
||||
if [[ ${REGEXP_IGNORE_EXTERNAL_DEPENDENCIES} != "" ]]; then
|
||||
MATCHES=$(grep -vE "${REGEXP_IGNORE_EXTERNAL_DEPENDENCIES}" <<< "${MATCHES}")
|
||||
fi
|
||||
if [[ ${REGEXP_IGNORE_KNOWN_VIOLATIONS} != "" ]]; then
|
||||
MATCHES=$(grep -vE "${REGEXP_IGNORE_KNOWN_VIOLATIONS}" <<< "${MATCHES}")
|
||||
fi
|
||||
if [[ ${MATCHES} != "" ]]; then
|
||||
echo "The locale dependent function ${LOCALE_DEPENDENT_FUNCTION}(...) appears to be used:"
|
||||
echo "${MATCHES}"
|
||||
echo
|
||||
EXIT_CODE=1
|
||||
fi
|
||||
done
|
||||
if [[ ${EXIT_CODE} != 0 ]]; then
|
||||
echo "Unnecessary locale dependence can cause bugs that are very"
|
||||
echo "tricky to isolate and fix. Please avoid using locale dependent"
|
||||
echo "functions if possible."
|
||||
echo
|
||||
echo "Advice not applicable in this specific case? Add an exception"
|
||||
echo "by updating the ignore list in $0"
|
||||
fi
|
||||
exit ${EXIT_CODE}
|
Loading…
Reference in New Issue
Block a user