gh-127545: Add _Py_ALIGNED_DEF(N, T) and use it for PyObject (GH-135209)

* Replace _Py_ALIGN_AS(V) by _Py_ALIGNED_DEF(N, T)

This is now a common façade for the various `_Alignas` alternatives,
which behave in interesting ways -- see the source comment.

The new macro (and MSVC's `__declspec(align)`) should not be used
on a variable/member declaration that includes a struct declaraton.
A workaround is to separate the struct definition.
Do that for `PyASCIIObject.state`.

* Specify minimum PyGC_Head and PyObject alignment

As documented in InternalDocs/garbage_collector.md, the garbage collector
stores flags in the least significant two bits of the _gc_prev pointer
in struct PyGC_Head. Consequently, this pointer is only capable of storing
a location that's aligned to a 4-byte boundary.

Encode this requirement using _Py_ALIGNED_DEF.

This patch fixes a segfault in m68k, which was previously investigated
by Adrian Glaubitz here:
https://lists.debian.org/debian-68k/2024/11/msg00020.html
https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1087600
Original patch (using the GCC-only Py_ALIGNED) by Finn Thain.

Co-authored-by: Finn Thain <fthain@linux-m68k.org>
Co-authored-by: Victor Stinner <vstinner@python.org>
Co-authored-by: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
This commit is contained in:
Petr Viktorin 2025-06-11 12:44:58 +02:00 committed by GitHub
parent 2b8b4774d2
commit 49d72365cd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 122 additions and 106 deletions

View File

@ -59,14 +59,6 @@
# include <intrin.h> // __readgsqword()
#endif
// Suppress known warnings in Python header files.
#if defined(_MSC_VER)
// Warning that alignas behaviour has changed. Doesn't affect us, because we
// never relied on the old behaviour.
#pragma warning(push)
#pragma warning(disable: 5274)
#endif
// Include Python header files
#include "pyport.h"
#include "pymacro.h"
@ -146,9 +138,4 @@
#include "cpython/pyfpe.h"
#include "cpython/tracemalloc.h"
// Restore warning filter
#ifdef _MSC_VER
#pragma warning(pop)
#endif
#endif /* !Py_PYTHON_H */

View File

@ -47,6 +47,63 @@ static inline Py_UCS4 Py_UNICODE_LOW_SURROGATE(Py_UCS4 ch) {
/* --- Unicode Type ------------------------------------------------------- */
struct _PyUnicodeObject_state {
/* If interned is non-zero, the two references from the
dictionary to this object are *not* counted in ob_refcnt.
The possible values here are:
0: Not Interned
1: Interned
2: Interned and Immortal
3: Interned, Immortal, and Static
This categorization allows the runtime to determine the right
cleanup mechanism at runtime shutdown. */
#ifdef Py_GIL_DISABLED
// Needs to be accessed atomically, so can't be a bit field.
unsigned char interned;
#else
unsigned int interned:2;
#endif
/* Character size:
- PyUnicode_1BYTE_KIND (1):
* character type = Py_UCS1 (8 bits, unsigned)
* all characters are in the range U+0000-U+00FF (latin1)
* if ascii is set, all characters are in the range U+0000-U+007F
(ASCII), otherwise at least one character is in the range
U+0080-U+00FF
- PyUnicode_2BYTE_KIND (2):
* character type = Py_UCS2 (16 bits, unsigned)
* all characters are in the range U+0000-U+FFFF (BMP)
* at least one character is in the range U+0100-U+FFFF
- PyUnicode_4BYTE_KIND (4):
* character type = Py_UCS4 (32 bits, unsigned)
* all characters are in the range U+0000-U+10FFFF
* at least one character is in the range U+10000-U+10FFFF
*/
unsigned int kind:3;
/* Compact is with respect to the allocation scheme. Compact unicode
objects only require one memory block while non-compact objects use
one block for the PyUnicodeObject struct and another for its data
buffer. */
unsigned int compact:1;
/* The string only contains characters in the range U+0000-U+007F (ASCII)
and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is
set, use the PyASCIIObject structure. */
unsigned int ascii:1;
/* The object is statically allocated. */
unsigned int statically_allocated:1;
#ifndef Py_GIL_DISABLED
/* Historical: padding to ensure that PyUnicode_DATA() is always aligned to
4 bytes (see issue gh-63736 on m68k) */
unsigned int :24;
#endif
};
/* ASCII-only strings created through PyUnicode_New use the PyASCIIObject
structure. state.ascii and state.compact are set, and the data
immediately follow the structure. utf8_length can be found
@ -99,67 +156,8 @@ typedef struct {
PyObject_HEAD
Py_ssize_t length; /* Number of code points in the string */
Py_hash_t hash; /* Hash value; -1 if not set */
#ifdef Py_GIL_DISABLED
/* Ensure 4 byte alignment for PyUnicode_DATA(), see gh-63736 on m68k.
In the non-free-threaded build, we'll use explicit padding instead */
_Py_ALIGN_AS(4)
#endif
struct {
/* If interned is non-zero, the two references from the
dictionary to this object are *not* counted in ob_refcnt.
The possible values here are:
0: Not Interned
1: Interned
2: Interned and Immortal
3: Interned, Immortal, and Static
This categorization allows the runtime to determine the right
cleanup mechanism at runtime shutdown. */
#ifdef Py_GIL_DISABLED
// Needs to be accessed atomically, so can't be a bit field.
unsigned char interned;
#else
unsigned int interned:2;
#endif
/* Character size:
- PyUnicode_1BYTE_KIND (1):
* character type = Py_UCS1 (8 bits, unsigned)
* all characters are in the range U+0000-U+00FF (latin1)
* if ascii is set, all characters are in the range U+0000-U+007F
(ASCII), otherwise at least one character is in the range
U+0080-U+00FF
- PyUnicode_2BYTE_KIND (2):
* character type = Py_UCS2 (16 bits, unsigned)
* all characters are in the range U+0000-U+FFFF (BMP)
* at least one character is in the range U+0100-U+FFFF
- PyUnicode_4BYTE_KIND (4):
* character type = Py_UCS4 (32 bits, unsigned)
* all characters are in the range U+0000-U+10FFFF
* at least one character is in the range U+10000-U+10FFFF
*/
unsigned int kind:3;
/* Compact is with respect to the allocation scheme. Compact unicode
objects only require one memory block while non-compact objects use
one block for the PyUnicodeObject struct and another for its data
buffer. */
unsigned int compact:1;
/* The string only contains characters in the range U+0000-U+007F (ASCII)
and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is
set, use the PyASCIIObject structure. */
unsigned int ascii:1;
/* The object is statically allocated. */
unsigned int statically_allocated:1;
#ifndef Py_GIL_DISABLED
/* Padding to ensure that PyUnicode_DATA() is always aligned to
4 bytes (see issue gh-63736 on m68k) */
unsigned int :24;
#endif
} state;
/* Ensure 4 byte alignment for PyUnicode_DATA(), see gh-63736 on m68k. */
_Py_ALIGNED_DEF(4, struct _PyUnicodeObject_state) state;
} PyASCIIObject;
/* Non-ASCII strings allocated through PyUnicode_New use the

View File

@ -159,10 +159,11 @@ struct atexit_state {
typedef struct {
// Tagged pointer to next object in the list.
// 0 means the object is not tracked
uintptr_t _gc_next;
_Py_ALIGNED_DEF(_PyObject_MIN_ALIGNMENT, uintptr_t) _gc_next;
// Tagged pointer to previous object in the list.
// Lowest two bits are used for flags documented later.
// Those bits are made available by the struct's minimum alignment.
uintptr_t _gc_prev;
} PyGC_Head;

View File

@ -101,6 +101,12 @@ whose size is determined when the object is allocated.
#define PyObject_VAR_HEAD PyVarObject ob_base;
#define Py_INVALID_SIZE (Py_ssize_t)-1
/* PyObjects are given a minimum alignment so that the least significant bits
* of an object pointer become available for other purposes.
* This must be an integer literal with the value (1 << _PyGC_PREV_SHIFT), number of bytes.
*/
#define _PyObject_MIN_ALIGNMENT 4
/* Nothing is actually declared to be a PyObject, but every pointer to
* a Python object can be cast to a PyObject*. This is inheritance built
* by hand. Similarly every pointer to a variable-size Python object can,
@ -136,6 +142,7 @@ struct _object {
#else
Py_ssize_t ob_refcnt;
#endif
_Py_ALIGNED_DEF(_PyObject_MIN_ALIGNMENT, char) _aligner;
};
#ifdef _MSC_VER
__pragma(warning(pop))
@ -153,7 +160,7 @@ struct _object {
// ob_tid stores the thread id (or zero). It is also used by the GC and the
// trashcan mechanism as a linked list pointer and by the GC to store the
// computed "gc_refs" refcount.
uintptr_t ob_tid;
_Py_ALIGNED_DEF(_PyObject_MIN_ALIGNMENT, uintptr_t) ob_tid;
uint16_t ob_flags;
PyMutex ob_mutex; // per-object lock
uint8_t ob_gc_bits; // gc-related state

View File

@ -24,44 +24,66 @@
#endif
// _Py_ALIGN_AS: this compiler's spelling of `alignas` keyword,
// We currently use alignas for free-threaded builds only; additional compat
// checking would be great before we add it to the default build.
// Standards/compiler support:
// _Py_ALIGNED_DEF(N, T): Define a variable/member with increased alignment
//
// `N`: the desired minimum alignment, an integer literal, number of bytes
// `T`: the type of the defined variable
// (or a type with at least the defined variable's alignment)
//
// May not be used on a struct definition.
//
// Standards/compiler support for `alignas` alternatives:
// - `alignas` is a keyword in C23 and C++11.
// - `_Alignas` is a keyword in C11
// - GCC & clang has __attribute__((aligned))
// (use that for older standards in pedantic mode)
// - MSVC has __declspec(align)
// - `_Alignas` is common C compiler extension
// Older compilers may name it differently; to allow compilation on such
// unsupported platforms, we don't redefine _Py_ALIGN_AS if it's already
// Older compilers may name `alignas` differently; to allow compilation on such
// unsupported platforms, we don't redefine _Py_ALIGNED_DEF if it's already
// defined. Note that defining it wrong (including defining it to nothing) will
// cause ABI incompatibilities.
#ifdef Py_GIL_DISABLED
# ifndef _Py_ALIGN_AS
# ifdef __cplusplus
# if __cplusplus >= 201103L
# define _Py_ALIGN_AS(V) alignas(V)
# elif defined(__GNUC__) || defined(__clang__)
# define _Py_ALIGN_AS(V) __attribute__((aligned(V)))
# elif defined(_MSC_VER)
# define _Py_ALIGN_AS(V) __declspec(align(V))
# else
# define _Py_ALIGN_AS(V) alignas(V)
# endif
# elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
# define _Py_ALIGN_AS(V) alignas(V)
# elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
# define _Py_ALIGN_AS(V) _Alignas(V)
# elif (defined(__GNUC__) || defined(__clang__))
# define _Py_ALIGN_AS(V) __attribute__((aligned(V)))
# elif defined(_MSC_VER)
# define _Py_ALIGN_AS(V) __declspec(align(V))
# else
# define _Py_ALIGN_AS(V) _Alignas(V)
# endif
# endif
//
// Behavior of `alignas` alternatives:
// - `alignas` & `_Alignas`:
// - Can be used multiple times; the greatest alignment applies.
// - It is an *error* if the combined effect of all `alignas` modifiers would
// decrease the alignment.
// - Takes types or numbers.
// - May not be used on a struct definition, unless also defining a variable.
// - `__declspec(align)`:
// - Has no effect if it would decrease alignment.
// - Only takes an integer literal.
// - May be used on struct or variable definitions.
// However, when defining both the struct and the variable at once,
// `declspec(aligned)` causes compiler warning 5274 and possible ABI
// incompatibility.
// - ` __attribute__((aligned))`:
// - Has no effect if it would decrease alignment.
// - Takes types or numbers
// - May be used on struct or variable definitions.
#ifndef _Py_ALIGNED_DEF
# ifdef __cplusplus
# if __cplusplus >= 201103L
# define _Py_ALIGNED_DEF(N, T) alignas(N) alignas(T) T
# elif defined(__GNUC__) || defined(__clang__)
# define _Py_ALIGNED_DEF(N, T) __attribute__((aligned(N))) T
# elif defined(_MSC_VER)
# define _Py_ALIGNED_DEF(N, T) __declspec(align(N)) T
# else
# define _Py_ALIGNED_DEF(N, T) alignas(N) alignas(T) T
# endif
# elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
# define _Py_ALIGNED_DEF(N, T) alignas(N) alignas(T) T
# elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
# define _Py_ALIGNED_DEF(N, T) _Alignas(N) _Alignas(T) T
# elif (defined(__GNUC__) || defined(__clang__))
# define _Py_ALIGNED_DEF(N, T) __attribute__((aligned(N))) T
# elif defined(_MSC_VER)
# define _Py_ALIGNED_DEF(N, T) __declspec(align(N)) T
# else
# define _Py_ALIGNED_DEF(N, T) _Alignas(N) _Alignas(T) T
# endif
#endif
/* Minimum value between x and y */

View File

@ -0,0 +1 @@
Fix crash when building on Linux/m68k.