Hans Goudey 91803e130f Cleanup: Grammar: Fix uses of "for e.g."
e.g. stands for "exempli gratia" in Latin which means "for example".
The best way to make sure it makes sense when writing is to just expand
it to "for example". In these cases where the text was "for e.g.", that
leaves us with "for for example" which makes no sense. This commit fixes
all 110 cases, mostly just just replacing the words with "for example",
but also restructuring the text a bit more in a few cases, mostly by
moving "e.g." to the beginning of a list in parentheses.

Pull Request: https://projects.blender.org/blender/blender/pulls/139596
2025-05-29 21:21:18 +02:00

687 lines
24 KiB
Python

# SPDX-FileCopyrightText: 2024 Blender Foundation
#
# SPDX-License-Identifier: GPL-2.0-or-later
# Ref: https://peps.python.org/pep-0491/
# Deferred but seems to include valid info for existing wheels.
"""
This module takes wheels and applies them to a "managed" destination directory.
"""
__all__ = (
"apply_action",
)
import contextlib
import os
import re
import shutil
import sys
import zipfile
from collections.abc import (
Callable,
Iterator,
)
WheelSource = tuple[
# Key - doesn't matter what this is... it's just a handle.
str,
# A list of absolute wheel file-paths.
list[str],
]
def _read_records_csv(filepath: str) -> list[list[str]]:
import csv
with open(filepath, encoding="utf8", errors="surrogateescape") as fh:
return list(csv.reader(fh.read().splitlines()))
def _wheels_from_dir(dirpath: str) -> tuple[
# The key is:
# wheel_id
# The values are:
# Top level directories.
dict[str, list[str]],
# Unknown paths.
list[str],
]:
result: dict[str, list[str]] = {}
paths_unused: set[str] = set()
if not os.path.exists(dirpath):
return result, list(paths_unused)
for entry in os.scandir(dirpath):
name = entry.name
paths_unused.add(name)
if not entry.is_dir():
continue
# TODO: is this part of the spec?
name = entry.name
if not name.endswith("-info"):
continue
filepath_record = os.path.join(entry.path, "RECORD")
if not os.path.exists(filepath_record):
continue
record_rows = _read_records_csv(filepath_record)
# Build top-level paths.
toplevel_paths_set: set[str] = set()
for row in record_rows:
if not row:
continue
path_text = row[0]
# Ensure paths separator is compatible.
path_text = path_text.replace("\\", "/")
# Ensure double slashes don't cause issues or "/./" doesn't complicate checking the head of the path.
path_split = [
elem for elem in path_text.split("/")
if elem not in {"", "."}
]
if not path_split:
continue
# These wont have been extracted.
if path_split[0] in {"..", name}:
continue
toplevel_paths_set.add(path_split[0])
# Some wheels contain `{name}.libs` which are *not* listed in `RECORD`.
# Always add the path, the value will be skipped if it's missing.
toplevel_paths_set.add(os.path.join(dirpath, name.partition("-")[0] + ".libs"))
result[name] = list(sorted(toplevel_paths_set))
del toplevel_paths_set
for wheel_name, toplevel_paths in result.items():
paths_unused.discard(wheel_name)
for name in toplevel_paths:
paths_unused.discard(name)
paths_unused_list = list(sorted(paths_unused))
return result, paths_unused_list
def _wheel_info_dir_from_zip(filepath_wheel: str) -> tuple[str, list[str]] | None:
"""
Return:
- The "*-info" directory name which contains meta-data.
- The top-level path list (excluding "..").
"""
dir_info = ""
toplevel_paths: set[str] = set()
with zipfile.ZipFile(filepath_wheel, mode="r") as zip_fh:
# This file will always exist.
for filepath_rel in zip_fh.namelist():
path_split = [
elem for elem in filepath_rel.split("/")
if elem not in {"", "."}
]
if not path_split:
continue
if path_split[0] == "..":
continue
if len(path_split) == 2:
if path_split[1].upper() == "RECORD":
if path_split[0].endswith("-info"):
dir_info = path_split[0]
toplevel_paths.add(path_split[0])
if dir_info == "":
return None
toplevel_paths.discard(dir_info)
toplevel_paths_list = list(sorted(toplevel_paths))
return dir_info, toplevel_paths_list
def _rmtree_safe(dir_remove: str, expected_root: str) -> Exception | None:
if not dir_remove.startswith(expected_root):
raise Exception("Expected prefix not found")
ex_result = None
if sys.version_info < (3, 12):
def on_error(*args) -> None: # type: ignore
nonlocal ex_result
print("Failed to remove:", args)
ex_result = args[2][0]
shutil.rmtree(dir_remove, onerror=on_error)
else:
def on_exc(*args) -> None: # type: ignore
nonlocal ex_result
print("Failed to remove:", args)
ex_result = args[2]
shutil.rmtree(dir_remove, onexc=on_exc)
return ex_result
def _remove_safe(file_remove: str) -> Exception | None:
ex_result = None
try:
os.remove(file_remove)
except Exception as ex:
ex_result = ex
return ex_result
# -----------------------------------------------------------------------------
# Support for Wheel: Binary distribution format
def _wheel_parse_key_value(data: bytes) -> dict[bytes, bytes]:
# Parse: `{module}.dist-info/WHEEL` format, parse it inline as
# this doesn't seem to use an existing specification, it's simply key/value pairs.
result = {}
for line in data.split(b"\n"):
key, sep, value = line.partition(b":")
if not sep:
continue
if not key:
continue
result[key.strip()] = value.strip()
return result
def _wheel_record_csv_remap(record_data: str, record_path_map: dict[str, str]) -> bytes:
import csv
from io import StringIO
lines_remap = []
for line in csv.reader(StringIO(record_data, newline="")):
# It's expected to be 3, in this case we only care about the first element (the path),
# however, if there are fewer items, this may be malformed or some unknown future format.
# - Only handle lines containing 3 elements.
# - Only manipulate the first element.
if len(line) < 3:
continue
# Items 1 and 2 are hash_sum & size respectively.
# If the files need to be modified these will need to be updated.
path = line[0]
if (path_remap := record_path_map.get(path)) is not None:
print(path_remap)
line = [path_remap, *line[0]]
lines_remap.append(line)
data = StringIO()
writer = csv.writer(data, delimiter=",", quotechar='"', lineterminator="\n")
writer.writerows(lines_remap)
return data.getvalue().encode("utf8")
def _wheel_zipfile_normalize(
zip_fh: zipfile.ZipFile,
error_fn: Callable[[Exception], None],
) -> dict[str, bytes] | None:
"""
Modify the ZIP file to account for Python's binary format.
"""
member_dict = {}
files_to_find = (".dist-info/WHEEL", ".dist-info/RECORD")
for member in zip_fh.infolist():
filename_orig = member.filename
if (
filename_orig.endswith(files_to_find) and
# Unlikely but possible the names also exist in nested directories.
(filename_orig.count("/") == 1)
):
member_dict[os.path.basename(filename_orig)] = member
if len(member_dict) == len(files_to_find):
break
if (
((member_wheel := member_dict.get("WHEEL")) is None) or
((member_record := member_dict.get("RECORD")) is None)
):
return None
try:
wheel_data = zip_fh.read(member_wheel.filename)
except Exception as ex:
error_fn(ex)
return None
wheel_key_values = _wheel_parse_key_value(wheel_data)
if wheel_key_values.get(b"Root-Is-Purelib", b"true").lower() != b"false":
return None
del wheel_key_values
# The setting has been found: `Root-Is-Purelib: false`.
# This requires the wheel to be mangled.
#
# - `{module-XXX}.dist-info/*` will have a:
# `{module-XXX}.data/purelib/`
# - For a full list see:
# https://docs.python.org/3/library/sysconfig.html#installation-paths
#
# Note that PIP's `wheel` package has a `wheel/wheelfile.py` file which is a useful reference.
assert member_wheel.filename.endswith("/WHEEL")
dirpath_dist_info = member_wheel.filename.removesuffix("/WHEEL")
assert dirpath_dist_info.endswith(".dist-info")
dirpath_data = dirpath_dist_info.removesuffix("dist-info") + "data"
dirpath_data_with_slash = dirpath_data + "/"
# https://docs.python.org/3/library/sysconfig.html#user-scheme
user_scheme_map = {}
data_map = {}
record_path_map = {}
# Simply strip the prefix in the case of `purelib` & `platlib`
# so the modules are found in the expected directory.
#
# Note that we could support a "bin" and other directories however
# for the purpose of Blender scripts, installing command line programs
# for Blender's add-ons to access via `bin` is quite niche (although not impossible).
#
# For the time being this is *not* full support Python's "User scheme"
# just enough to import modules.
#
# Omitting other directories such as "includes" & "scripts" means these will remain in the
# `{module-XXX}.data/includes` sub-directory, support for them can always be added if needed.
user_scheme_map["purelib"] = ""
user_scheme_map["platlib"] = ""
for member in zip_fh.infolist():
filepath_orig = member.filename
if not filepath_orig.startswith(dirpath_data_with_slash):
continue
path_base, path_tail = filepath_orig[len(dirpath_data_with_slash):].partition("/")[0::2]
# The path may not contain a tail, skip these cases.
if not path_tail:
continue
if (path_base_remap := user_scheme_map.get(path_base)) is None:
continue
if path_base_remap:
filepath_remap = "{:s}/{:s}".format(path_base_remap, path_tail)
else:
filepath_remap = path_tail
member.filename = filepath_remap
record_path_map[filepath_orig] = filepath_remap
try:
data_map[member_record.filename] = _wheel_record_csv_remap(
zip_fh.read(member_record.filename).decode("utf8"),
record_path_map,
)
except Exception as ex:
error_fn(ex)
return None
# Nothing to remap.
if not record_path_map:
return None
return data_map
# -----------------------------------------------------------------------------
# Generic ZIP File Extractions
def _zipfile_extractall_safe(
zip_fh: zipfile.ZipFile,
path: str,
path_restrict: str,
*,
error_fn: Callable[[Exception], None],
remove_error_fn: Callable[[str, Exception], None],
# Map zip-file data to bytes.
# Only for small files as the mapped data needs to be held in memory.
# As it happens for this use case, it's only needed for the CSV file listing.
data_map: dict[str, bytes] | None,
) -> None:
"""
A version of ``ZipFile.extractall`` that wont write to paths outside ``path_restrict``.
Avoids writing this:
``zip_fh.extractall(zip_fh, path)``
"""
sep = os.sep
path_restrict = path_restrict.rstrip(sep)
if sep == "\\":
path_restrict = path_restrict.rstrip("/")
path_restrict_with_slash = path_restrict + sep
# Strip is probably not needed (only if multiple slashes exist).
path_prefix = path[len(path_restrict_with_slash):].lstrip(sep)
# Switch slashes forward.
if sep == "\\":
path_prefix = path_prefix.replace("\\", "/").rstrip("/") + "/"
else:
path_prefix = path_prefix + "/"
path_restrict_with_slash = path_restrict + sep
assert len(path) >= len(path_restrict_with_slash)
if not path.startswith(path_restrict_with_slash):
# This is an internal error if it ever happens.
raise Exception("Expected the restricted directory to start with \"{:s}\"".format(path_restrict_with_slash))
has_error = False
member_index = 0
# Use an iterator to avoid duplicating the checks (for the cleanup pass).
def zip_iter_filtered(*, verbose: bool) -> Iterator[tuple[zipfile.ZipInfo, str, str]]:
for member in zip_fh.infolist():
filename_orig = member.filename
filename_next = path_prefix + filename_orig
# This isn't likely to happen so accept a noisy print here.
# If this ends up happening more often, it could be suppressed.
# (although this hints at bigger problems because we might be excluding necessary files).
if os.path.normpath(filename_next).startswith(".." + sep):
if verbose:
print("Skipping path:", filename_next, "that escapes:", path_restrict)
continue
yield member, filename_orig, filename_next
for member, filename_orig, filename_next in zip_iter_filtered(verbose=True):
# Increment before extracting, so a potential cleanup will a file that failed to extract.
member_index += 1
member.filename = filename_next
data_transform = None if data_map is None else data_map.get(filename_orig)
filepath_native = path_restrict + sep + filename_next.replace("/", sep)
# Extraction can fail for many reasons, see: #132924.
try:
if data_transform is not None:
with open(filepath_native, "wb") as fh:
fh.write(data_transform)
else:
zip_fh.extract(member, path_restrict)
except Exception as ex:
error_fn(ex)
print("Failed to extract path:", filepath_native, "error", str(ex))
remove_error_fn(filepath_native, ex)
has_error = True
member.filename = filename_orig
if has_error:
break
# If the zip-file failed to extract, remove all files that were extracted.
# This is done so failure to extract a file never results in a partially-working
# state which can cause confusing situations for users.
if has_error:
# NOTE: this currently leaves empty directories which is not ideal.
# It's possible to calculate directories created by this extraction but more involved.
member_cleanup_len = member_index + 1
member_index = 0
for member, filename_orig, filename_next in zip_iter_filtered(verbose=False):
member_index += 1
if member_index >= member_cleanup_len:
break
filepath_native = path_restrict + sep + filename_next.replace("/", sep)
try:
os.unlink(filepath_native)
except Exception as ex:
remove_error_fn(filepath_native, ex)
# -----------------------------------------------------------------------------
# Wheel Utilities
WHEEL_VERSION_RE = re.compile(r"(\d+)?(?:\.(\d+))?(?:\.(\d+))")
def wheel_version_from_filename_for_cmp(
filename: str,
) -> tuple[int, int, int, str]:
"""
Extract the version number for comparison.
Note that this only handled the first 3 numbers,
the trailing text is compared as a string which is not technically correct
however this is not a priority to support since scripts should only be including stable releases,
so comparing the first 3 numbers is sufficient. The trailing string is just a tie breaker in the
unlikely event it differs.
If supporting the full spec, comparing: "1.1.dev6" with "1.1.6rc6" for example
we could support this doesn't seem especially important as extensions should use major releases.
"""
filename_split = filename.split("-")
if len(filename_split) >= 2:
version = filename.split("-")[1]
if (version_match := WHEEL_VERSION_RE.match(version)) is not None:
groups = version_match.groups()
# print(groups)
return (
int(groups[0]) if groups[0] is not None else 0,
int(groups[1]) if groups[1] is not None else 0,
int(groups[2]) if groups[2] is not None else 0,
version[version_match.end():],
)
return (0, 0, 0, "")
def wheel_list_deduplicate_as_skip_set(
wheel_list: list[WheelSource],
) -> set[str]:
"""
Return all wheel paths to skip.
"""
wheels_to_skip: set[str] = set()
all_wheels: set[str] = {
filepath
for _, wheels in wheel_list
for filepath in wheels
}
# NOTE: this is not optimized.
# Probably speed is never an issue here, but this could be sped up.
# Keep a map from the base name to the "best" wheel,
# the other wheels get added to `wheels_to_skip` to be ignored.
all_wheels_by_base: dict[str, str] = {}
for wheel in all_wheels:
wheel_filename = os.path.basename(wheel)
wheel_base = wheel_filename.partition("-")[0]
wheel_exists = all_wheels_by_base.get(wheel_base)
if wheel_exists is None:
all_wheels_by_base[wheel_base] = wheel
continue
wheel_exists_filename = os.path.basename(wheel_exists)
if wheel_exists_filename == wheel_filename:
# Should never happen because they are converted into a set before looping.
assert wheel_exists != wheel
# The same wheel is used in two different locations, use a tie breaker for predictability
# although the result should be the same.
if wheel_exists_filename < wheel_filename:
all_wheels_by_base[wheel_base] = wheel
wheels_to_skip.add(wheel_exists)
else:
wheels_to_skip.add(wheel)
else:
wheel_version = wheel_version_from_filename_for_cmp(wheel_filename)
wheel_exists_version = wheel_version_from_filename_for_cmp(wheel_exists_filename)
if (
(wheel_exists_version < wheel_version) or
# Tie breaker for predictability.
((wheel_exists_version == wheel_version) and (wheel_exists_filename < wheel_filename))
):
all_wheels_by_base[wheel_base] = wheel
wheels_to_skip.add(wheel_exists)
else:
wheels_to_skip.add(wheel)
return wheels_to_skip
# -----------------------------------------------------------------------------
# Public Function to Apply Wheels
def apply_action(
*,
local_dir: str,
local_dir_site_packages: str,
wheel_list: list[WheelSource],
error_fn: Callable[[Exception], None],
remove_error_fn: Callable[[str, Exception], None],
debug: bool,
) -> None:
"""
:arg local_dir:
The location wheels are stored.
Typically: ``~/.config/blender/4.2/extensions/.local``.
WARNING: files under this directory may be removed.
:arg local_dir_site_packages:
The path which wheels are extracted into.
Typically: ``~/.config/blender/4.2/extensions/.local/lib/python3.11/site-packages``.
"""
# NOTE: we could avoid scanning the wheel directories however:
# Recursively removing all paths on the users system can be considered relatively risky
# even if this is located in a known location under the users home directory - better avoid.
# So build a list of wheel paths and only remove the unused paths from this list.
wheels_installed, _paths_unknown = _wheels_from_dir(local_dir_site_packages)
# Wheels and their top level directories (which would be installed).
wheels_packages: dict[str, list[str]] = {}
# Map the wheel ID to path.
wheels_dir_info_to_filepath_map: dict[str, str] = {}
# NOTE(@ideasman42): the wheels skip-set only de-duplicates at the level of the base-name of the wheels filename.
# So the wheel file-paths:
# - `pip-24.0-py3-none-any.whl`
# - `pip-22.1-py2-none-any.whl`
# Will both extract the *base* name `pip`, de-duplicating by skipping the wheels with an older version number.
# This is not fool-proof, because it is possible files inside the `.whl` conflict upon extraction.
# In practice I consider this fairly unlikely because:
# - Practically all wheels extract to their top-level module names.
# - Modules are mainly downloaded from the Python package index.
#
# Having two modules conflict is possible but this is an issue outside of Blender,
# as it's most likely quite rare and generally avoided with unique module names,
# this is not considered a problem to "solve" at the moment.
#
# The one exception to this assumption is any extensions that bundle `.whl` files that aren't
# available on the Python package index. In this case naming collisions are more likely.
# This probably needs to be handled on a policy level - if the `.whl` author also maintains
# the extension they can in all likelihood make the module a sub-module of the extension
# without the need to use `.whl` files.
wheels_to_skip = wheel_list_deduplicate_as_skip_set(wheel_list)
for _key, wheels in wheel_list:
for wheel in wheels:
if wheel in wheels_to_skip:
continue
if (wheel_info := _wheel_info_dir_from_zip(wheel)) is None:
continue
dir_info, toplevel_paths_list = wheel_info
wheels_packages[dir_info] = toplevel_paths_list
wheels_dir_info_to_filepath_map[dir_info] = wheel
# Now there is two sets of packages, the ones we need and the ones we have.
# -----
# Clear
# First remove installed packages no longer needed:
for dir_info, toplevel_paths_list in wheels_installed.items():
if dir_info in wheels_packages:
continue
# Remove installed packages which aren't needed any longer.
for filepath_rel in (dir_info, *toplevel_paths_list):
filepath_abs = os.path.join(local_dir_site_packages, filepath_rel)
if not os.path.exists(filepath_abs):
continue
if debug:
print("removing wheel:", filepath_rel)
ex: Exception | None = None
if os.path.isdir(filepath_abs):
ex = _rmtree_safe(filepath_abs, local_dir)
# For symbolic-links, use remove as a fallback.
if ex is not None:
if _remove_safe(filepath_abs) is None:
ex = None
else:
ex = _remove_safe(filepath_abs)
if ex:
if debug:
print("failed to remove:", filepath_rel, str(ex), "setting stale")
# If the directory (or file) can't be removed, make it stale and try to remove it later.
remove_error_fn(filepath_abs, ex)
# -----
# Setup
# Install packages that need to be installed:
for dir_info, toplevel_paths_list in wheels_packages.items():
if dir_info in wheels_installed:
continue
if debug:
for filepath_rel in toplevel_paths_list:
print("adding wheel:", filepath_rel)
filepath = wheels_dir_info_to_filepath_map[dir_info]
# `ZipFile.extractall` is needed because some wheels contain paths that point to parent directories.
# Handle this *safely* by allowing extracting to parent directories but limit this to the `local_dir`.
try:
# pylint: disable-next=consider-using-with
zip_fh_context = zipfile.ZipFile(filepath, mode="r")
except Exception as ex:
print("Error ({:s}) opening zip-file: {:s}".format(str(ex), filepath))
error_fn(ex)
continue
with contextlib.closing(zip_fh_context) as zip_fh:
# Support non `Root-is-purelib` wheels, where the data needs to be remapped, see: .
# Typically `data_map` will be none, see: #132843 for the use case that requires this functionality.
#
# NOTE: these wheels should be included in tests (generated and checked to properly install).
# Unfortunately there doesn't seem to a be practical way to generate them using the `wheel` module.
data_map = _wheel_zipfile_normalize(
zip_fh,
error_fn=error_fn,
)
_zipfile_extractall_safe(
zip_fh,
local_dir_site_packages,
local_dir,
error_fn=error_fn,
remove_error_fn=remove_error_fn,
data_map=data_map,
)