#!/usr/bin/env python3 # SPDX-FileCopyrightText: 2023 Blender Authors # # SPDX-License-Identifier: GPL-2.0-or-later r""" Validates sizes in C/C++ sources written as: ``type name[/*MAX_NAME*/ 64]`` where ``MAX_NAME`` is expected to be a define equal to 64, otherwise a warning is reported. """ __all__ = ( "main", ) import os import sys import re THIS_DIR = os.path.dirname(__file__) BASE_DIR = os.path.normpath(os.path.abspath(os.path.normpath(os.path.join(THIS_DIR, "..", "..")))) sys.path.append(os.path.join(THIS_DIR, "..", "utils_maintenance", "modules")) from batch_edit_text import run import line_number_utils # ----------------------------------------------------------------------------- # Utilities # ----------------------------------------------------------------------------- # Local Settings # TODO, move to config file SOURCE_DIRS = ( "source", ) SOURCE_EXT = ( # C/C++ ".c", ".h", ".cpp", ".hpp", ".cc", ".hh", ".cxx", ".hxx", ".inl", # Objective C ".m", ".mm", # GLSL ".glsl", ) # Mainly useful for development to check extraction & validation are working. SHOW_SUCCESS = True # ----------------------------------------------------------------------------- # Globals # Map defines to a list of (filename-split, value) pairs. global_defines: dict[ # The define ID. str, # Value(s), in case it's defined in multiple files. list[ tuple[ # The `BASE_DIR` relative path (split by `os.sep`). tuple[str, ...], # The value of the define, # a literal string with comments stripped out. str, ], ], ] = {} REGEX_ID_LITERAL = "[A-Za-z_][A-Za-z_0-9]*" # Detect: # `[/*ID*/ 64]`. # `[/*ID - 2*/ 62]`. REGEX_SIZE_COMMENT_IN_ARRAY = re.compile("\\[\\/\\*([^\\]]+)\\*\\/\\s*(\\d+)\\]") # Detect: `#define ID 64` REGEX_DEFINE_C_LIKE = re.compile("^\\s*#\\s*define\\s+(" + REGEX_ID_LITERAL + ")[ \t]+([^\n]+)", re.MULTILINE) # Detect: # `ID = 64,` # `ID = 64` REGEX_ENUM_C_LIKE = re.compile("^\\s*(" + REGEX_ID_LITERAL + ")\\s=\\s([^,\n]+)", re.MULTILINE) # Detect ID's. REGEX_ID_OR_NUMBER_C_LIKE = re.compile("[A-Za-z0-9_]+") def extract_defines(filepath: str, data_src: str) -> None: filepath_rel = os.path.relpath(filepath, BASE_DIR) for regex_matcher in (REGEX_DEFINE_C_LIKE, REGEX_ENUM_C_LIKE): for m in regex_matcher.finditer(data_src): value_id = m.group(1) value_literal = m.group(2) # Weak comment stripping. # This is (arguably) acceptable since the intent is to extract numbers, # if developers feel the need to write lines such as: # `#define VALUE_MAX /* Lets make some trouble! */ 64` # Then they can consider if that's actually needed (sigh!)... # Otherwise, we could replace this with a full parser such as CLANG, # however this is a bit of a hassle to setup. if "//" in value_literal: value_literal = value_literal.split("//", 1)[0] if "/*" in value_literal: value_literal = value_literal.split("/*", 1)[0] try: global_defines[value_id].append((tuple(filepath_rel.split(os.sep)), value_literal)) except KeyError: global_defines[value_id] = [(tuple(filepath_rel.split(os.sep)), value_literal)] # Returning None indicates the file is not edited. def path_score_distance(a: tuple[str, ...], b: tuple[str, ...]) -> tuple[int, int]: """ Compare two paths, to find which paths are "closer" to each-other. This is used as a tie breaker when defines are found in multiple headers. """ count_shared = 0 range_min = min(len(a), len(b)) range_max = max(len(a), len(b)) for i in range(range_min): if a[i] != b[i]: break count_shared += 1 count_nested = range_max - count_shared # Negate shared so smaller is better. # Less path nesting also gets priority. return (-count_shared, count_nested) def eval_define( value_literal: str, *, default: str, filepath_ref_split: tuple[str, ...], ) -> tuple[str, list[str]]: failed: list[str] = [] def re_replace_fn(match: re.Match[str]) -> str: value = match.group() if value.isdigit(): return value other_values = global_defines.get(value) if other_values is None: failed.append(value) return value if len(other_values) == 1: other_filepath_split, other_literal = other_values[0] else: # Find the "closest" on the file system. # In practice favor paths which are co-located works fairly well, # needed as it's now known which headers ID's in a head *could* reference. other_literal_best = "" other_score_best = (0, 0) other_filepath_split_best: tuple[str, ...] = ("",) for other_filepath_split_test, other_literal_test in other_values: other_score_test = path_score_distance(filepath_ref_split, other_filepath_split_test) if ( # First time. (not other_literal_best) or # A lower score has been found (smaller is better). (other_score_test < other_score_best) ): other_literal_best = other_literal_test other_score_best = other_score_test other_filepath_split_best = other_filepath_split_test del other_score_test other_literal = other_literal_best other_filepath_split = other_filepath_split_best del other_literal_best, other_score_best, other_filepath_split_best other_literal_eval, other_failed = eval_define( other_literal, default="", filepath_ref_split=other_filepath_split, ) if other_literal_eval: return other_literal_eval # `failed.append(value)` is also valid, report the gestured failure as its more likely to give insights # into what went wrong. failed.extend(other_failed) return value # Use integer division. value_literal = value_literal.replace(r"/", r"//") # Populates `failed`. value_literal_eval = REGEX_ID_OR_NUMBER_C_LIKE.sub(re_replace_fn, value_literal) if failed: # One or more ID could not be found. return default, failed # This could use exception handling, don't unless it's needed though. # pylint: disable-next=eval-used return str(eval(value_literal_eval)), failed def validate_sizes(filepath: str, data_src: str) -> None: # Nicer for printing. filepath_rel = os.path.relpath(filepath, BASE_DIR) filepath_rel_split = tuple(filepath_rel.split(os.sep)) for m, line, (beg, end) in line_number_utils.finditer_with_line_numbers_and_bounds( REGEX_SIZE_COMMENT_IN_ARRAY, data_src, ): del end value_id = m.group(1) value_literal = m.group(2) value_eval, lookups_failed = eval_define( value_id, default="", filepath_ref_split=filepath_rel_split, ) data_line_column = "{:s}:{:d}:{:d}:".format( filepath_rel, line + 1, # Place the cursor after the `[`. (m.start(0) + 1) - beg, ) if len(value_id.strip()) != len(value_id): print("WARN:", data_line_column, "comment includes white-space") continue if lookups_failed: print("WARN:", data_line_column, "[{:s}]".format(", ".join(lookups_failed)), "unknown") continue if value_literal != value_eval: print("WARN:", data_line_column, value_id, "mismatch", "({:s} != {:s})".format(value_literal, value_eval)) continue if SHOW_SUCCESS: print("OK: ", data_line_column, "{:s}={:s},".format(value_id, value_literal)) # Returning None indicates the file is not edited. def main() -> int: # Extract defines. run( directories=[os.path.join(BASE_DIR, d) for d in SOURCE_DIRS], is_text=lambda filepath: filepath.endswith(SOURCE_EXT), text_operation=extract_defines, # Can't be used if we want to accumulate in a global variable. use_multiprocess=False, ) # For predictable lookups on tie breakers. # In practice it should almost never matter. for values in global_defines.values(): if len(values) > 1: values.sort() # Validate sizes. run( directories=[os.path.join(BASE_DIR, d) for d in SOURCE_DIRS], is_text=lambda filepath: filepath.endswith(SOURCE_EXT), text_operation=validate_sizes, # Can't be used if we want to accumulate in a global variable. use_multiprocess=False, ) return 0 if __name__ == "__main__": sys.exit(main())