Merge bitcoin/bitcoin#25015: test: Use permissions from git in lint-files.py

908fb7e2ec37fe68675d38dbfee4df9f861bb2b5 test: Use permissions from git in `lint-files.py` (laanwj)
48d2e80a7479a44b0ab09e87542c8cb7a8f72223 test: Don't use shell=True in `lint-files.py` (laanwj)

Pull request description:

  Improvements to the `lint-files.py` script:

  - Avoid use of `shell=True`.
  - Check the permissions in git's metadata instead of in the filesystem. This stops the umask or filesystem from interfering. It's also more efficient as it only needs a single call to `git ls-files`.

  (what triggered this change was `File "..." contains a shebang line, but has the file permission 775 instead of the expected executable permission 755.` errors running the script locally).

ACKs for top commit:
  vincenzopalazzo:
    re-tACK 908fb7e2ec

Tree-SHA512: 2eaf868c55a9c3508b12658a5b3ac429963fd0551e645332d0ac54be56fefccee95115e4667386df24b46b545593cb0d0bf8c6cecab73f9cb19d37ddf704c614
This commit is contained in:
MacroFake 2022-05-23 18:59:21 +02:00 committed by Konstantin Akimov
parent f226e8dc1f
commit c8c58a1810
No known key found for this signature in database
GPG Key ID: 2176C4A5D01EA524

View File

@ -11,19 +11,20 @@ import os
import re import re
import sys import sys
from subprocess import check_output from subprocess import check_output
from typing import Optional, NoReturn from typing import Dict, Optional, NoReturn
CMD_TOP_LEVEL = ["git", "rev-parse", "--show-toplevel"] CMD_TOP_LEVEL = ["git", "rev-parse", "--show-toplevel"]
CMD_ALL_FILES = "git ls-files -z --full-name" CMD_ALL_FILES = ["git", "ls-files", "-z", "--full-name", "--stage"]
CMD_SOURCE_FILES = 'git ls-files -z --full-name -- "*.[cC][pP][pP]" "*.[hH]" "*.[pP][yY]" "*.[sS][hH]"' CMD_SHEBANG_FILES = ["git", "grep", "--full-name", "--line-number", "-I", "^#!"]
CMD_SHEBANG_FILES = "git grep --full-name --line-number -I '^#!'"
ALL_SOURCE_FILENAMES_REGEXP = r"^.*\.(cpp|h|py|sh)$"
ALLOWED_FILENAME_REGEXP = "^[a-zA-Z0-9/_.@][a-zA-Z0-9/_.@-]*$" ALLOWED_FILENAME_REGEXP = "^[a-zA-Z0-9/_.@][a-zA-Z0-9/_.@-]*$"
ALLOWED_SOURCE_FILENAME_REGEXP = "^[a-z0-9_./-]+$" ALLOWED_SOURCE_FILENAME_REGEXP = "^[a-z0-9_./-]+$"
ALLOWED_SOURCE_FILENAME_EXCEPTION_REGEXP = ( ALLOWED_SOURCE_FILENAME_EXCEPTION_REGEXP = (
"^src/(secp256k1/|univalue/|test/fuzz/FuzzedDataProvider.h)" "^src/(secp256k1/|univalue/|test/fuzz/FuzzedDataProvider.h)"
) )
ALLOWED_PERMISSION_NON_EXECUTABLES = 644 ALLOWED_PERMISSION_NON_EXECUTABLES = 0o644
ALLOWED_PERMISSION_EXECUTABLES = 755 ALLOWED_PERMISSION_EXECUTABLES = 0o755
ALLOWED_EXECUTABLE_SHEBANG = { ALLOWED_EXECUTABLE_SHEBANG = {
"py": [b"#!/usr/bin/env python3"], "py": [b"#!/usr/bin/env python3"],
"sh": [b"#!/usr/bin/env bash", b"#!/bin/sh"], "sh": [b"#!/usr/bin/env bash", b"#!/bin/sh"],
@ -31,8 +32,15 @@ ALLOWED_EXECUTABLE_SHEBANG = {
class FileMeta(object): class FileMeta(object):
def __init__(self, file_path: str): def __init__(self, file_spec: str):
self.file_path = file_path '''Parse a `git ls files --stage` output line.'''
# 100755 5a150d5f8031fcd75e80a4dd9843afa33655f579 0 ci/test/00_setup_env.sh
meta, self.file_path = file_spec.split('\t', 2)
meta = meta.split()
# The octal file permission of the file. Internally, git only
# keeps an 'executable' bit, so this will always be 0o644 or 0o755.
self.permissions = int(meta[0], 8) & 0o7777
# We don't currently care about the other fields
@property @property
def extension(self) -> Optional[str]: def extension(self) -> Optional[str]:
@ -60,20 +68,24 @@ class FileMeta(object):
except IndexError: except IndexError:
return None return None
@property
def permissions(self) -> int:
"""
Returns the octal file permission of the file
"""
return int(oct(os.stat(self.file_path).st_mode)[-3:])
def get_git_file_metadata() -> Dict[str, FileMeta]:
'''
Return a dictionary mapping the name of all files in the repository to git tree metadata.
'''
files_raw = check_output(CMD_ALL_FILES).decode("utf8").rstrip("\0").split("\0")
files = {}
for file_spec in files_raw:
meta = FileMeta(file_spec)
files[meta.file_path] = meta
return files
def check_all_filenames() -> int: def check_all_filenames(files) -> int:
""" """
Checks every file in the repository against an allowed regexp to make sure only lowercase or uppercase Checks every file in the repository against an allowed regexp to make sure only lowercase or uppercase
alphanumerics (a-zA-Z0-9), underscores (_), hyphens (-), at (@) and dots (.) are used in repository filenames. alphanumerics (a-zA-Z0-9), underscores (_), hyphens (-), at (@) and dots (.) are used in repository filenames.
""" """
filenames = check_output(CMD_ALL_FILES, shell=True).decode("utf8").rstrip("\0").split("\0") filenames = files.keys()
filename_regex = re.compile(ALLOWED_FILENAME_REGEXP) filename_regex = re.compile(ALLOWED_FILENAME_REGEXP)
failed_tests = 0 failed_tests = 0
for filename in filenames: for filename in filenames:
@ -85,14 +97,14 @@ def check_all_filenames() -> int:
return failed_tests return failed_tests
def check_source_filenames() -> int: def check_source_filenames(files) -> int:
""" """
Checks only source files (*.cpp, *.h, *.py, *.sh) against a stricter allowed regexp to make sure only lowercase Checks only source files (*.cpp, *.h, *.py, *.sh) against a stricter allowed regexp to make sure only lowercase
alphanumerics (a-z0-9), underscores (_), hyphens (-) and dots (.) are used in source code filenames. alphanumerics (a-z0-9), underscores (_), hyphens (-) and dots (.) are used in source code filenames.
Additionally there is an exception regexp for directories or files which are excepted from matching this regexp. Additionally there is an exception regexp for directories or files which are excepted from matching this regexp.
""" """
filenames = check_output(CMD_SOURCE_FILES, shell=True).decode("utf8").rstrip("\0").split("\0") filenames = [filename for filename in files.keys() if re.match(ALL_SOURCE_FILENAMES_REGEXP, filename, re.IGNORECASE)]
filename_regex = re.compile(ALLOWED_SOURCE_FILENAME_REGEXP) filename_regex = re.compile(ALLOWED_SOURCE_FILENAME_REGEXP)
filename_exception_regex = re.compile(ALLOWED_SOURCE_FILENAME_EXCEPTION_REGEXP) filename_exception_regex = re.compile(ALLOWED_SOURCE_FILENAME_EXCEPTION_REGEXP)
failed_tests = 0 failed_tests = 0
@ -105,16 +117,14 @@ def check_source_filenames() -> int:
return failed_tests return failed_tests
def check_all_file_permissions() -> int: def check_all_file_permissions(files) -> int:
""" """
Checks all files in the repository match an allowed executable or non-executable file permission octal. Checks all files in the repository match an allowed executable or non-executable file permission octal.
Additionally checks that for executable files, the file contains a shebang line Additionally checks that for executable files, the file contains a shebang line
""" """
filenames = check_output(CMD_ALL_FILES, shell=True).decode("utf8").rstrip("\0").split("\0")
failed_tests = 0 failed_tests = 0
for filename in filenames: for filename, file_meta in files.items():
file_meta = FileMeta(filename)
if file_meta.permissions == ALLOWED_PERMISSION_EXECUTABLES: if file_meta.permissions == ALLOWED_PERMISSION_EXECUTABLES:
with open(filename, "rb") as f: with open(filename, "rb") as f:
shebang = f.readline().rstrip(b"\n") shebang = f.readline().rstrip(b"\n")
@ -122,7 +132,7 @@ def check_all_file_permissions() -> int:
# For any file with executable permissions the first line must contain a shebang # For any file with executable permissions the first line must contain a shebang
if not shebang.startswith(b"#!"): if not shebang.startswith(b"#!"):
print( print(
f"""File "{filename}" has permission {ALLOWED_PERMISSION_EXECUTABLES} (executable) and is thus expected to contain a shebang '#!'. Add shebang or do "chmod {ALLOWED_PERMISSION_NON_EXECUTABLES} {filename}" to make it non-executable.""" f"""File "{filename}" has permission {ALLOWED_PERMISSION_EXECUTABLES:03o} (executable) and is thus expected to contain a shebang '#!'. Add shebang or do "chmod {ALLOWED_PERMISSION_NON_EXECUTABLES:03o} {filename}" to make it non-executable."""
) )
failed_tests += 1 failed_tests += 1
@ -145,18 +155,18 @@ def check_all_file_permissions() -> int:
continue continue
else: else:
print( print(
f"""File "{filename}" has unexpected permission {file_meta.permissions}. Do "chmod {ALLOWED_PERMISSION_NON_EXECUTABLES} {filename}" (if non-executable) or "chmod {ALLOWED_PERMISSION_EXECUTABLES} {filename}" (if executable).""" f"""File "{filename}" has unexpected permission {file_meta.permissions:03o}. Do "chmod {ALLOWED_PERMISSION_NON_EXECUTABLES:03o} {filename}" (if non-executable) or "chmod {ALLOWED_PERMISSION_EXECUTABLES:03o} {filename}" (if executable)."""
) )
failed_tests += 1 failed_tests += 1
return failed_tests return failed_tests
def check_shebang_file_permissions() -> int: def check_shebang_file_permissions(files_meta) -> int:
""" """
Checks every file that contains a shebang line to ensure it has an executable permission Checks every file that contains a shebang line to ensure it has an executable permission
""" """
filenames = check_output(CMD_SHEBANG_FILES, shell=True).decode("utf8").strip().split("\n") filenames = check_output(CMD_SHEBANG_FILES).decode("utf8").strip().split("\n")
# The git grep command we use returns files which contain a shebang on any line within the file # The git grep command we use returns files which contain a shebang on any line within the file
# so we need to filter the list to only files with the shebang on the first line # so we need to filter the list to only files with the shebang on the first line
@ -164,7 +174,7 @@ def check_shebang_file_permissions() -> int:
failed_tests = 0 failed_tests = 0
for filename in filenames: for filename in filenames:
file_meta = FileMeta(filename) file_meta = files_meta[filename]
if file_meta.permissions != ALLOWED_PERMISSION_EXECUTABLES: if file_meta.permissions != ALLOWED_PERMISSION_EXECUTABLES:
# These file types are typically expected to be sourced and not executed directly # These file types are typically expected to be sourced and not executed directly
if file_meta.full_extension in ["bash", "init", "openrc", "sh.in"]: if file_meta.full_extension in ["bash", "init", "openrc", "sh.in"]:
@ -178,7 +188,7 @@ def check_shebang_file_permissions() -> int:
continue continue
print( print(
f"""File "{filename}" contains a shebang line, but has the file permission {file_meta.permissions} instead of the expected executable permission {ALLOWED_PERMISSION_EXECUTABLES}. Do "chmod {ALLOWED_PERMISSION_EXECUTABLES} {filename}" (or remove the shebang line).""" f"""File "{filename}" contains a shebang line, but has the file permission {file_meta.permissions:03o} instead of the expected executable permission {ALLOWED_PERMISSION_EXECUTABLES:03o}. Do "chmod {ALLOWED_PERMISSION_EXECUTABLES:03o} {filename}" (or remove the shebang line)."""
) )
failed_tests += 1 failed_tests += 1
return failed_tests return failed_tests
@ -187,11 +197,14 @@ def check_shebang_file_permissions() -> int:
def main() -> NoReturn: def main() -> NoReturn:
root_dir = check_output(CMD_TOP_LEVEL).decode("utf8").strip() root_dir = check_output(CMD_TOP_LEVEL).decode("utf8").strip()
os.chdir(root_dir) os.chdir(root_dir)
files = get_git_file_metadata()
failed_tests = 0 failed_tests = 0
failed_tests += check_all_filenames() failed_tests += check_all_filenames(files)
failed_tests += check_source_filenames() failed_tests += check_source_filenames(files)
failed_tests += check_all_file_permissions() failed_tests += check_all_file_permissions(files)
failed_tests += check_shebang_file_permissions() failed_tests += check_shebang_file_permissions(files)
if failed_tests: if failed_tests:
print( print(