blender/tools/check_source/static_check_size_comments.py
Campbell Barton 0265b13399 Tools: add a utility to validate array sizes
The script check_source/static_check_size_comments.py run directly
or called via the convenience target "make check_size_comments".

Add a utility module: `line_number_utils` which implements
a version of `re.finditer` that includes line numbers & ranges.
2025-05-23 14:04:26 +10:00

281 lines
8.9 KiB
Python
Executable File

#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2023 Blender Authors
#
# SPDX-License-Identifier: GPL-2.0-or-later
r"""
Validates sizes in C/C++ sources written as: ``type name[/*MAX_NAME*/ 64]``
where ``MAX_NAME`` is expected to be a define equal to 64, otherwise a warning is reported.
"""
__all__ = (
"main",
)
import os
import sys
import re
THIS_DIR = os.path.dirname(__file__)
BASE_DIR = os.path.normpath(os.path.abspath(os.path.normpath(os.path.join(THIS_DIR, "..", ".."))))
sys.path.append(os.path.join(THIS_DIR, "..", "utils_maintenance", "modules"))
from batch_edit_text import run
import line_number_utils
# -----------------------------------------------------------------------------
# Utilities
# -----------------------------------------------------------------------------
# Local Settings
# TODO, move to config file
SOURCE_DIRS = (
"source",
)
SOURCE_EXT = (
# C/C++
".c", ".h", ".cpp", ".hpp", ".cc", ".hh", ".cxx", ".hxx", ".inl",
# Objective C
".m", ".mm",
# GLSL
".glsl",
)
# Mainly useful for development to check extraction & validation are working.
SHOW_SUCCESS = True
# -----------------------------------------------------------------------------
# Globals
# Map defines to a list of (filename-split, value) pairs.
global_defines: dict[
# The define ID.
str,
# Value(s), in case it's defined in multiple files.
list[
tuple[
# The `BASE_DIR` relative path (split by `os.sep`).
tuple[str, ...],
# The value of the define,
# a literal string with comments stripped out.
str,
],
],
] = {}
REGEX_ID_LITERAL = "[A-Za-z_][A-Za-z_0-9]*"
# Detect:
# `[/*ID*/ 64]`.
# `[/*ID - 2*/ 62]`.
REGEX_SIZE_COMMENT_IN_ARRAY = re.compile("\\[\\/\\*([^\\]]+)\\*\\/\\s*(\\d+)\\]")
# Detect: `#define ID 64`
REGEX_DEFINE_C_LIKE = re.compile("^\\s*#\\s*define\\s+(" + REGEX_ID_LITERAL + ")[ \t]+([^\n]+)", re.MULTILINE)
# Detect:
# `ID = 64,`
# `ID = 64`
REGEX_ENUM_C_LIKE = re.compile("^\\s*(" + REGEX_ID_LITERAL + ")\\s=\\s([^,\n]+)", re.MULTILINE)
# Detect ID's.
REGEX_ID_OR_NUMBER_C_LIKE = re.compile("[A-Za-z0-9_]+")
def extract_defines(filepath: str, data_src: str) -> None:
filepath_rel = os.path.relpath(filepath, BASE_DIR)
for regex_matcher in (REGEX_DEFINE_C_LIKE, REGEX_ENUM_C_LIKE):
for m in regex_matcher.finditer(data_src):
value_id = m.group(1)
value_literal = m.group(2)
# Weak comment stripping.
# This is (arguably) acceptable since the intent is to extract numbers,
# if developers feel the need to write lines such as:
# `#define VALUE_MAX /* Lets make some trouble! */ 64`
# Then they can consider if that's actually needed (sigh!)...
# Otherwise, we could replace this with a full parser such as CLANG,
# however this is a bit of a hassle to setup.
if "//" in value_literal:
value_literal = value_literal.split("//", 1)[0]
if "/*" in value_literal:
value_literal = value_literal.split("/*", 1)[0]
try:
global_defines[value_id].append((tuple(filepath_rel.split(os.sep)), value_literal))
except KeyError:
global_defines[value_id] = [(tuple(filepath_rel.split(os.sep)), value_literal)]
# Returning None indicates the file is not edited.
def path_score_distance(a: tuple[str, ...], b: tuple[str, ...]) -> tuple[int, int]:
"""
Compare two paths, to find which paths are "closer" to each-other.
This is used as a tie breaker when defines are found in multiple headers.
"""
count_shared = 0
range_min = min(len(a), len(b))
range_max = max(len(a), len(b))
for i in range(range_min):
if a[i] != b[i]:
break
count_shared += 1
count_nested = range_max - count_shared
# Negate shared so smaller is better.
# Less path nesting also gets priority.
return (-count_shared, count_nested)
def eval_define(
value_literal: str,
*,
default: str,
filepath_ref_split: tuple[str, ...],
) -> tuple[str, list[str]]:
failed: list[str] = []
def re_replace_fn(match: re.Match[str]) -> str:
value = match.group()
if value.isdigit():
return value
other_values = global_defines.get(value)
if other_values is None:
failed.append(value)
return value
if len(other_values) == 1:
other_filepath_split, other_literal = other_values[0]
else:
# Find the "closest" on the file system.
# In practice favor paths which are co-located works fairly well,
# needed as it's now known which headers ID's in a head *could* reference.
other_literal_best = ""
other_score_best = (0, 0)
other_filepath_split_best: tuple[str, ...] = ("",)
for other_filepath_split_test, other_literal_test in other_values:
other_score_test = path_score_distance(filepath_ref_split, other_filepath_split_test)
if (
# First time.
(not other_literal_best) or
# A lower score has been found (smaller is better).
(other_score_test < other_score_best)
):
other_literal_best = other_literal_test
other_score_best = other_score_test
other_filepath_split_best = other_filepath_split_test
del other_score_test
other_literal = other_literal_best
other_filepath_split = other_filepath_split_best
del other_literal_best, other_score_best, other_filepath_split_best
other_literal_eval, other_failed = eval_define(
other_literal,
default="",
filepath_ref_split=other_filepath_split,
)
if other_literal_eval:
return other_literal_eval
# `failed.append(value)` is also valid, report the gestured failure as its more likely to give insights
# into what went wrong.
failed.extend(other_failed)
return value
# Use integer division.
value_literal = value_literal.replace(r"/", r"//")
# Populates `failed`.
value_literal_eval = REGEX_ID_OR_NUMBER_C_LIKE.sub(re_replace_fn, value_literal)
if failed:
# One or more ID could not be found.
return default, failed
# This could use exception handling, don't unless it's needed though.
# pylint: disable-next=eval-used
return str(eval(value_literal_eval)), failed
def validate_sizes(filepath: str, data_src: str) -> None:
# Nicer for printing.
filepath_rel = os.path.relpath(filepath, BASE_DIR)
filepath_rel_split = tuple(filepath_rel.split(os.sep))
for m, line, (beg, end) in line_number_utils.finditer_with_line_numbers_and_bounds(
REGEX_SIZE_COMMENT_IN_ARRAY,
data_src,
):
del end
value_id = m.group(1)
value_literal = m.group(2)
value_eval, lookups_failed = eval_define(
value_id,
default="",
filepath_ref_split=filepath_rel_split,
)
data_line_column = "{:s}:{:d}:{:d}:".format(
filepath_rel,
line + 1,
# Place the cursor after the `[`.
(m.start(0) + 1) - beg,
)
if len(value_id.strip()) != len(value_id):
print("WARN:", data_line_column, "comment includes white-space")
continue
if lookups_failed:
print("WARN:", data_line_column, "[{:s}]".format(", ".join(lookups_failed)), "unknown")
continue
if value_literal != value_eval:
print("WARN:", data_line_column, value_id, "mismatch", "({:s} != {:s})".format(value_literal, value_eval))
continue
if SHOW_SUCCESS:
print("OK: ", data_line_column, "{:s}={:s},".format(value_id, value_literal))
# Returning None indicates the file is not edited.
def main() -> int:
# Extract defines.
run(
directories=[os.path.join(BASE_DIR, d) for d in SOURCE_DIRS],
is_text=lambda filepath: filepath.endswith(SOURCE_EXT),
text_operation=extract_defines,
# Can't be used if we want to accumulate in a global variable.
use_multiprocess=False,
)
# For predictable lookups on tie breakers.
# In practice it should almost never matter.
for values in global_defines.values():
if len(values) > 1:
values.sort()
# Validate sizes.
run(
directories=[os.path.join(BASE_DIR, d) for d in SOURCE_DIRS],
is_text=lambda filepath: filepath.endswith(SOURCE_EXT),
text_operation=validate_sizes,
# Can't be used if we want to accumulate in a global variable.
use_multiprocess=False,
)
return 0
if __name__ == "__main__":
sys.exit(main())