The script check_source/static_check_size_comments.py run directly or called via the convenience target "make check_size_comments". Add a utility module: `line_number_utils` which implements a version of `re.finditer` that includes line numbers & ranges.
281 lines
8.9 KiB
Python
Executable File
281 lines
8.9 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
# SPDX-FileCopyrightText: 2023 Blender Authors
|
|
#
|
|
# SPDX-License-Identifier: GPL-2.0-or-later
|
|
|
|
r"""
|
|
Validates sizes in C/C++ sources written as: ``type name[/*MAX_NAME*/ 64]``
|
|
where ``MAX_NAME`` is expected to be a define equal to 64, otherwise a warning is reported.
|
|
"""
|
|
__all__ = (
|
|
"main",
|
|
)
|
|
|
|
import os
|
|
import sys
|
|
import re
|
|
|
|
THIS_DIR = os.path.dirname(__file__)
|
|
BASE_DIR = os.path.normpath(os.path.abspath(os.path.normpath(os.path.join(THIS_DIR, "..", ".."))))
|
|
sys.path.append(os.path.join(THIS_DIR, "..", "utils_maintenance", "modules"))
|
|
|
|
from batch_edit_text import run
|
|
import line_number_utils
|
|
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# Utilities
|
|
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# Local Settings
|
|
|
|
# TODO, move to config file
|
|
SOURCE_DIRS = (
|
|
"source",
|
|
)
|
|
|
|
SOURCE_EXT = (
|
|
# C/C++
|
|
".c", ".h", ".cpp", ".hpp", ".cc", ".hh", ".cxx", ".hxx", ".inl",
|
|
# Objective C
|
|
".m", ".mm",
|
|
# GLSL
|
|
".glsl",
|
|
)
|
|
|
|
# Mainly useful for development to check extraction & validation are working.
|
|
SHOW_SUCCESS = True
|
|
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# Globals
|
|
|
|
|
|
# Map defines to a list of (filename-split, value) pairs.
|
|
global_defines: dict[
|
|
# The define ID.
|
|
str,
|
|
# Value(s), in case it's defined in multiple files.
|
|
list[
|
|
tuple[
|
|
# The `BASE_DIR` relative path (split by `os.sep`).
|
|
tuple[str, ...],
|
|
# The value of the define,
|
|
# a literal string with comments stripped out.
|
|
str,
|
|
],
|
|
],
|
|
] = {}
|
|
|
|
|
|
REGEX_ID_LITERAL = "[A-Za-z_][A-Za-z_0-9]*"
|
|
|
|
# Detect:
|
|
# `[/*ID*/ 64]`.
|
|
# `[/*ID - 2*/ 62]`.
|
|
REGEX_SIZE_COMMENT_IN_ARRAY = re.compile("\\[\\/\\*([^\\]]+)\\*\\/\\s*(\\d+)\\]")
|
|
# Detect: `#define ID 64`
|
|
REGEX_DEFINE_C_LIKE = re.compile("^\\s*#\\s*define\\s+(" + REGEX_ID_LITERAL + ")[ \t]+([^\n]+)", re.MULTILINE)
|
|
# Detect:
|
|
# `ID = 64,`
|
|
# `ID = 64`
|
|
REGEX_ENUM_C_LIKE = re.compile("^\\s*(" + REGEX_ID_LITERAL + ")\\s=\\s([^,\n]+)", re.MULTILINE)
|
|
# Detect ID's.
|
|
REGEX_ID_OR_NUMBER_C_LIKE = re.compile("[A-Za-z0-9_]+")
|
|
|
|
|
|
def extract_defines(filepath: str, data_src: str) -> None:
|
|
filepath_rel = os.path.relpath(filepath, BASE_DIR)
|
|
for regex_matcher in (REGEX_DEFINE_C_LIKE, REGEX_ENUM_C_LIKE):
|
|
for m in regex_matcher.finditer(data_src):
|
|
value_id = m.group(1)
|
|
value_literal = m.group(2)
|
|
|
|
# Weak comment stripping.
|
|
# This is (arguably) acceptable since the intent is to extract numbers,
|
|
# if developers feel the need to write lines such as:
|
|
# `#define VALUE_MAX /* Lets make some trouble! */ 64`
|
|
# Then they can consider if that's actually needed (sigh!)...
|
|
# Otherwise, we could replace this with a full parser such as CLANG,
|
|
# however this is a bit of a hassle to setup.
|
|
if "//" in value_literal:
|
|
value_literal = value_literal.split("//", 1)[0]
|
|
if "/*" in value_literal:
|
|
value_literal = value_literal.split("/*", 1)[0]
|
|
|
|
try:
|
|
global_defines[value_id].append((tuple(filepath_rel.split(os.sep)), value_literal))
|
|
except KeyError:
|
|
global_defines[value_id] = [(tuple(filepath_rel.split(os.sep)), value_literal)]
|
|
|
|
# Returning None indicates the file is not edited.
|
|
|
|
|
|
def path_score_distance(a: tuple[str, ...], b: tuple[str, ...]) -> tuple[int, int]:
|
|
"""
|
|
Compare two paths, to find which paths are "closer" to each-other.
|
|
This is used as a tie breaker when defines are found in multiple headers.
|
|
"""
|
|
count_shared = 0
|
|
range_min = min(len(a), len(b))
|
|
range_max = max(len(a), len(b))
|
|
for i in range(range_min):
|
|
if a[i] != b[i]:
|
|
break
|
|
count_shared += 1
|
|
|
|
count_nested = range_max - count_shared
|
|
# Negate shared so smaller is better.
|
|
# Less path nesting also gets priority.
|
|
return (-count_shared, count_nested)
|
|
|
|
|
|
def eval_define(
|
|
value_literal: str,
|
|
*,
|
|
default: str,
|
|
filepath_ref_split: tuple[str, ...],
|
|
) -> tuple[str, list[str]]:
|
|
failed: list[str] = []
|
|
|
|
def re_replace_fn(match: re.Match[str]) -> str:
|
|
value = match.group()
|
|
if value.isdigit():
|
|
return value
|
|
|
|
other_values = global_defines.get(value)
|
|
if other_values is None:
|
|
failed.append(value)
|
|
return value
|
|
|
|
if len(other_values) == 1:
|
|
other_filepath_split, other_literal = other_values[0]
|
|
else:
|
|
# Find the "closest" on the file system.
|
|
# In practice favor paths which are co-located works fairly well,
|
|
# needed as it's now known which headers ID's in a head *could* reference.
|
|
other_literal_best = ""
|
|
other_score_best = (0, 0)
|
|
other_filepath_split_best: tuple[str, ...] = ("",)
|
|
|
|
for other_filepath_split_test, other_literal_test in other_values:
|
|
other_score_test = path_score_distance(filepath_ref_split, other_filepath_split_test)
|
|
if (
|
|
# First time.
|
|
(not other_literal_best) or
|
|
# A lower score has been found (smaller is better).
|
|
(other_score_test < other_score_best)
|
|
):
|
|
other_literal_best = other_literal_test
|
|
other_score_best = other_score_test
|
|
other_filepath_split_best = other_filepath_split_test
|
|
del other_score_test
|
|
other_literal = other_literal_best
|
|
other_filepath_split = other_filepath_split_best
|
|
del other_literal_best, other_score_best, other_filepath_split_best
|
|
|
|
other_literal_eval, other_failed = eval_define(
|
|
other_literal,
|
|
default="",
|
|
filepath_ref_split=other_filepath_split,
|
|
)
|
|
if other_literal_eval:
|
|
return other_literal_eval
|
|
|
|
# `failed.append(value)` is also valid, report the gestured failure as its more likely to give insights
|
|
# into what went wrong.
|
|
failed.extend(other_failed)
|
|
return value
|
|
|
|
# Use integer division.
|
|
value_literal = value_literal.replace(r"/", r"//")
|
|
|
|
# Populates `failed`.
|
|
value_literal_eval = REGEX_ID_OR_NUMBER_C_LIKE.sub(re_replace_fn, value_literal)
|
|
|
|
if failed:
|
|
# One or more ID could not be found.
|
|
return default, failed
|
|
|
|
# This could use exception handling, don't unless it's needed though.
|
|
# pylint: disable-next=eval-used
|
|
return str(eval(value_literal_eval)), failed
|
|
|
|
|
|
def validate_sizes(filepath: str, data_src: str) -> None:
|
|
# Nicer for printing.
|
|
filepath_rel = os.path.relpath(filepath, BASE_DIR)
|
|
filepath_rel_split = tuple(filepath_rel.split(os.sep))
|
|
|
|
for m, line, (beg, end) in line_number_utils.finditer_with_line_numbers_and_bounds(
|
|
REGEX_SIZE_COMMENT_IN_ARRAY,
|
|
data_src,
|
|
):
|
|
del end
|
|
value_id = m.group(1)
|
|
value_literal = m.group(2)
|
|
|
|
value_eval, lookups_failed = eval_define(
|
|
value_id,
|
|
default="",
|
|
filepath_ref_split=filepath_rel_split,
|
|
)
|
|
|
|
data_line_column = "{:s}:{:d}:{:d}:".format(
|
|
filepath_rel,
|
|
line + 1,
|
|
# Place the cursor after the `[`.
|
|
(m.start(0) + 1) - beg,
|
|
)
|
|
|
|
if len(value_id.strip()) != len(value_id):
|
|
print("WARN:", data_line_column, "comment includes white-space")
|
|
continue
|
|
|
|
if lookups_failed:
|
|
print("WARN:", data_line_column, "[{:s}]".format(", ".join(lookups_failed)), "unknown")
|
|
continue
|
|
|
|
if value_literal != value_eval:
|
|
print("WARN:", data_line_column, value_id, "mismatch", "({:s} != {:s})".format(value_literal, value_eval))
|
|
continue
|
|
|
|
if SHOW_SUCCESS:
|
|
print("OK: ", data_line_column, "{:s}={:s},".format(value_id, value_literal))
|
|
|
|
# Returning None indicates the file is not edited.
|
|
|
|
|
|
def main() -> int:
|
|
|
|
# Extract defines.
|
|
run(
|
|
directories=[os.path.join(BASE_DIR, d) for d in SOURCE_DIRS],
|
|
is_text=lambda filepath: filepath.endswith(SOURCE_EXT),
|
|
text_operation=extract_defines,
|
|
# Can't be used if we want to accumulate in a global variable.
|
|
use_multiprocess=False,
|
|
)
|
|
|
|
# For predictable lookups on tie breakers.
|
|
# In practice it should almost never matter.
|
|
for values in global_defines.values():
|
|
if len(values) > 1:
|
|
values.sort()
|
|
|
|
# Validate sizes.
|
|
run(
|
|
directories=[os.path.join(BASE_DIR, d) for d in SOURCE_DIRS],
|
|
is_text=lambda filepath: filepath.endswith(SOURCE_EXT),
|
|
text_operation=validate_sizes,
|
|
# Can't be used if we want to accumulate in a global variable.
|
|
use_multiprocess=False,
|
|
)
|
|
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|