gh-134891: Add PyUnstable_Unicode_GET_CACHED_HASH (GH-134892)

This commit is contained in:
Petr Viktorin 2025-06-06 15:51:00 +02:00 committed by GitHub
parent 343182853f
commit e413e26719
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 55 additions and 5 deletions

View File

@ -191,6 +191,22 @@ access to internal read-only data of Unicode objects:
.. versionadded:: 3.2
.. c:function:: Py_hash_t PyUnstable_Unicode_GET_CACHED_HASH(PyObject *str)
If the hash of *str*, as returned by :c:func:`PyObject_Hash`, has been
cached and is immediately available, return it.
Otherwise, return ``-1`` *without* setting an exception.
If *str* is not a string (that is, if ``PyUnicode_Check(obj)``
is false), the behavior is undefined.
This function never fails with an exception.
Note that there are no guarantees on when an object's hash is cached,
and the (non-)existence of a cached hash does not imply that the string has
any other properties.
Unicode Character Properties
""""""""""""""""""""""""""""

View File

@ -304,6 +304,10 @@ New features
input string contains non-ASCII characters.
(Contributed by Victor Stinner in :gh:`133968`.)
* Add :c:type:`PyUnstable_Unicode_GET_CACHED_HASH` to get the cached hash of
a string. See the documentation for caveats.
(Contributed by Petr Viktorin in :gh:`131510`)
Porting to Python 3.15
----------------------

View File

@ -300,6 +300,17 @@ static inline Py_ssize_t PyUnicode_GET_LENGTH(PyObject *op) {
}
#define PyUnicode_GET_LENGTH(op) PyUnicode_GET_LENGTH(_PyObject_CAST(op))
/* Returns the cached hash, or -1 if not cached yet. */
static inline Py_hash_t
PyUnstable_Unicode_GET_CACHED_HASH(PyObject *op) {
assert(PyUnicode_Check(op));
#ifdef Py_GIL_DISABLED
return _Py_atomic_load_ssize_relaxed(&_PyASCIIObject_CAST(op)->hash);
#else
return _PyASCIIObject_CAST(op)->hash;
#endif
}
/* Write into the canonical representation, this function does not do any sanity
checks and is intended for usage in loops. The caller should cache the
kind and data pointers obtained from other function calls.

View File

@ -1739,6 +1739,20 @@ class CAPITest(unittest.TestCase):
# Check that the second call returns the same result
self.assertEqual(getargs_s_hash(s), chr(k).encode() * (i + 1))
@support.cpython_only
@unittest.skipIf(_testcapi is None, 'need _testcapi module')
def test_GET_CACHED_HASH(self):
from _testcapi import unicode_GET_CACHED_HASH
content_bytes = b'some new string'
# avoid parser interning & constant folding
obj = str(content_bytes, 'ascii')
# impl detail: fresh strings do not have cached hash
self.assertEqual(unicode_GET_CACHED_HASH(obj), -1)
# impl detail: adding string to a dict caches its hash
{obj: obj}
# impl detail: ASCII string hashes are equal to bytes ones
self.assertEqual(unicode_GET_CACHED_HASH(obj), hash(content_bytes))
class PyUnicodeWriterTest(unittest.TestCase):
def create_writer(self, size):

View File

@ -0,0 +1,2 @@
Add :c:type:`PyUnstable_Unicode_GET_CACHED_HASH` to get the cached hash of a
string.

View File

@ -220,6 +220,12 @@ unicode_copycharacters(PyObject *self, PyObject *args)
return Py_BuildValue("(Nn)", to_copy, copied);
}
static PyObject*
unicode_GET_CACHED_HASH(PyObject *self, PyObject *arg)
{
return PyLong_FromSsize_t(PyUnstable_Unicode_GET_CACHED_HASH(arg));
}
// --- PyUnicodeWriter type -------------------------------------------------
@ -570,6 +576,7 @@ static PyMethodDef TestMethods[] = {
{"unicode_asucs4copy", unicode_asucs4copy, METH_VARARGS},
{"unicode_asutf8", unicode_asutf8, METH_VARARGS},
{"unicode_copycharacters", unicode_copycharacters, METH_VARARGS},
{"unicode_GET_CACHED_HASH", unicode_GET_CACHED_HASH, METH_O},
{NULL},
};

View File

@ -167,11 +167,7 @@ static inline void PyUnicode_SET_UTF8_LENGTH(PyObject *op, Py_ssize_t length)
#define _PyUnicode_HASH(op) \
(_PyASCIIObject_CAST(op)->hash)
static inline Py_hash_t PyUnicode_HASH(PyObject *op)
{
assert(_PyUnicode_CHECK(op));
return FT_ATOMIC_LOAD_SSIZE_RELAXED(_PyASCIIObject_CAST(op)->hash);
}
#define PyUnicode_HASH PyUnstable_Unicode_GET_CACHED_HASH
static inline void PyUnicode_SET_HASH(PyObject *op, Py_hash_t hash)
{