Skip to content

Commit

Permalink
gh-128013: Convert unicodeobject.c macros to functions (#128061)
Browse files Browse the repository at this point in the history
Convert unicodeobject.c macros to static inline functions.

* Add _PyUnicode_SET_UTF8() and _PyUnicode_SET_UTF8_LENGTH() macros.
* Add PyUnicode_HASH() and PyUnicode_SET_HASH() macros.
* Remove unused _PyUnicode_KIND() and _PyUnicode_GET_LENGTH() macros.
  • Loading branch information
vstinner authored Dec 18, 2024
1 parent 91c5508 commit f802c8b
Showing 1 changed file with 78 additions and 45 deletions.
123 changes: 78 additions & 45 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -112,47 +112,80 @@ NOTE: In the interpreter's initialization phase, some globals are currently
# define _PyUnicode_CHECK(op) PyUnicode_Check(op)
#endif

#define _PyUnicode_UTF8(op) \
(_PyCompactUnicodeObject_CAST(op)->utf8)
#define PyUnicode_UTF8(op) \
(assert(_PyUnicode_CHECK(op)), \
PyUnicode_IS_COMPACT_ASCII(op) ? \
((char*)(_PyASCIIObject_CAST(op) + 1)) : \
_PyUnicode_UTF8(op))
#define _PyUnicode_UTF8_LENGTH(op) \
(_PyCompactUnicodeObject_CAST(op)->utf8_length)
#define PyUnicode_UTF8_LENGTH(op) \
(assert(_PyUnicode_CHECK(op)), \
PyUnicode_IS_COMPACT_ASCII(op) ? \
_PyASCIIObject_CAST(op)->length : \
_PyUnicode_UTF8_LENGTH(op))
static inline char* _PyUnicode_UTF8(PyObject *op)
{
return (_PyCompactUnicodeObject_CAST(op)->utf8);
}

static inline char* PyUnicode_UTF8(PyObject *op)
{
assert(_PyUnicode_CHECK(op));
if (PyUnicode_IS_COMPACT_ASCII(op)) {
return ((char*)(_PyASCIIObject_CAST(op) + 1));
}
else {
return _PyUnicode_UTF8(op);
}
}

static inline void PyUnicode_SET_UTF8(PyObject *op, char *utf8)
{
_PyCompactUnicodeObject_CAST(op)->utf8 = utf8;
}

static inline Py_ssize_t PyUnicode_UTF8_LENGTH(PyObject *op)
{
assert(_PyUnicode_CHECK(op));
if (PyUnicode_IS_COMPACT_ASCII(op)) {
return _PyASCIIObject_CAST(op)->length;
}
else {
return _PyCompactUnicodeObject_CAST(op)->utf8_length;
}
}

static inline void PyUnicode_SET_UTF8_LENGTH(PyObject *op, Py_ssize_t length)
{
_PyCompactUnicodeObject_CAST(op)->utf8_length = length;
}

#define _PyUnicode_LENGTH(op) \
(_PyASCIIObject_CAST(op)->length)
#define _PyUnicode_STATE(op) \
(_PyASCIIObject_CAST(op)->state)
#define _PyUnicode_HASH(op) \
(_PyASCIIObject_CAST(op)->hash)
#define _PyUnicode_KIND(op) \
(assert(_PyUnicode_CHECK(op)), \
_PyASCIIObject_CAST(op)->state.kind)
#define _PyUnicode_GET_LENGTH(op) \
(assert(_PyUnicode_CHECK(op)), \
_PyASCIIObject_CAST(op)->length)

static inline Py_hash_t PyUnicode_HASH(PyObject *op)
{
assert(_PyUnicode_CHECK(op));
return FT_ATOMIC_LOAD_SSIZE_RELAXED(_PyASCIIObject_CAST(op)->hash);
}

static inline void PyUnicode_SET_HASH(PyObject *op, Py_hash_t hash)
{
FT_ATOMIC_STORE_SSIZE_RELAXED(_PyASCIIObject_CAST(op)->hash, hash);
}

#define _PyUnicode_DATA_ANY(op) \
(_PyUnicodeObject_CAST(op)->data.any)

#define _PyUnicode_SHARE_UTF8(op) \
(assert(_PyUnicode_CHECK(op)), \
assert(!PyUnicode_IS_COMPACT_ASCII(op)), \
(_PyUnicode_UTF8(op) == PyUnicode_DATA(op)))
static inline int _PyUnicode_SHARE_UTF8(PyObject *op)
{
assert(_PyUnicode_CHECK(op));
assert(!PyUnicode_IS_COMPACT_ASCII(op));
return (_PyUnicode_UTF8(op) == PyUnicode_DATA(op));
}

/* true if the Unicode object has an allocated UTF-8 memory block
(not shared with other data) */
#define _PyUnicode_HAS_UTF8_MEMORY(op) \
((!PyUnicode_IS_COMPACT_ASCII(op) \
&& _PyUnicode_UTF8(op) \
&& _PyUnicode_UTF8(op) != PyUnicode_DATA(op)))
static inline int _PyUnicode_HAS_UTF8_MEMORY(PyObject *op)
{
return (!PyUnicode_IS_COMPACT_ASCII(op)
&& _PyUnicode_UTF8(op) != NULL
&& _PyUnicode_UTF8(op) != PyUnicode_DATA(op));
}


/* Generic helper macro to convert characters of different types.
from_type and to_type have to be valid type names, begin and end
Expand Down Expand Up @@ -1123,8 +1156,8 @@ resize_compact(PyObject *unicode, Py_ssize_t length)

if (_PyUnicode_HAS_UTF8_MEMORY(unicode)) {
PyMem_Free(_PyUnicode_UTF8(unicode));
_PyUnicode_UTF8(unicode) = NULL;
_PyUnicode_UTF8_LENGTH(unicode) = 0;
PyUnicode_SET_UTF8(unicode, NULL);
PyUnicode_SET_UTF8_LENGTH(unicode, 0);
}
#ifdef Py_TRACE_REFS
_Py_ForgetReference(unicode);
Expand Down Expand Up @@ -1177,8 +1210,8 @@ resize_inplace(PyObject *unicode, Py_ssize_t length)
if (!share_utf8 && _PyUnicode_HAS_UTF8_MEMORY(unicode))
{
PyMem_Free(_PyUnicode_UTF8(unicode));
_PyUnicode_UTF8(unicode) = NULL;
_PyUnicode_UTF8_LENGTH(unicode) = 0;
PyUnicode_SET_UTF8(unicode, NULL);
PyUnicode_SET_UTF8_LENGTH(unicode, 0);
}

data = (PyObject *)PyObject_Realloc(data, new_size);
Expand All @@ -1188,8 +1221,8 @@ resize_inplace(PyObject *unicode, Py_ssize_t length)
}
_PyUnicode_DATA_ANY(unicode) = data;
if (share_utf8) {
_PyUnicode_UTF8(unicode) = data;
_PyUnicode_UTF8_LENGTH(unicode) = length;
PyUnicode_SET_UTF8(unicode, data);
PyUnicode_SET_UTF8_LENGTH(unicode, length);
}
_PyUnicode_LENGTH(unicode) = length;
PyUnicode_WRITE(PyUnicode_KIND(unicode), data, length, 0);
Expand Down Expand Up @@ -1769,7 +1802,7 @@ unicode_modifiable(PyObject *unicode)
assert(_PyUnicode_CHECK(unicode));
if (Py_REFCNT(unicode) != 1)
return 0;
if (FT_ATOMIC_LOAD_SSIZE_RELAXED(_PyUnicode_HASH(unicode)) != -1)
if (PyUnicode_HASH(unicode) != -1)
return 0;
if (PyUnicode_CHECK_INTERNED(unicode))
return 0;
Expand Down Expand Up @@ -5862,8 +5895,8 @@ unicode_fill_utf8(PyObject *unicode)
PyErr_NoMemory();
return -1;
}
_PyUnicode_UTF8(unicode) = cache;
_PyUnicode_UTF8_LENGTH(unicode) = len;
PyUnicode_SET_UTF8(unicode, cache);
PyUnicode_SET_UTF8_LENGTH(unicode, len);
memcpy(cache, start, len);
cache[len] = '\0';
_PyBytesWriter_Dealloc(&writer);
Expand Down Expand Up @@ -11434,9 +11467,9 @@ _PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right)
return 0;
}

Py_hash_t right_hash = FT_ATOMIC_LOAD_SSIZE_RELAXED(_PyUnicode_HASH(right_uni));
Py_hash_t right_hash = PyUnicode_HASH(right_uni);
assert(right_hash != -1);
Py_hash_t hash = FT_ATOMIC_LOAD_SSIZE_RELAXED(_PyUnicode_HASH(left));
Py_hash_t hash = PyUnicode_HASH(left);
if (hash != -1 && hash != right_hash) {
return 0;
}
Expand Down Expand Up @@ -11916,14 +11949,14 @@ unicode_hash(PyObject *self)
#ifdef Py_DEBUG
assert(_Py_HashSecret_Initialized);
#endif
Py_hash_t hash = FT_ATOMIC_LOAD_SSIZE_RELAXED(_PyUnicode_HASH(self));
Py_hash_t hash = PyUnicode_HASH(self);
if (hash != -1) {
return hash;
}
x = Py_HashBuffer(PyUnicode_DATA(self),
PyUnicode_GET_LENGTH(self) * PyUnicode_KIND(self));

FT_ATOMIC_STORE_SSIZE_RELAXED(_PyUnicode_HASH(self), x);
PyUnicode_SET_HASH(self, x);
return x;
}

Expand Down Expand Up @@ -15427,8 +15460,8 @@ unicode_subtype_new(PyTypeObject *type, PyObject *unicode)
_PyUnicode_STATE(self).compact = 0;
_PyUnicode_STATE(self).ascii = _PyUnicode_STATE(unicode).ascii;
_PyUnicode_STATE(self).statically_allocated = 0;
_PyUnicode_UTF8_LENGTH(self) = 0;
_PyUnicode_UTF8(self) = NULL;
PyUnicode_SET_UTF8_LENGTH(self, 0);
PyUnicode_SET_UTF8(self, NULL);
_PyUnicode_DATA_ANY(self) = NULL;

share_utf8 = 0;
Expand Down Expand Up @@ -15458,8 +15491,8 @@ unicode_subtype_new(PyTypeObject *type, PyObject *unicode)

_PyUnicode_DATA_ANY(self) = data;
if (share_utf8) {
_PyUnicode_UTF8_LENGTH(self) = length;
_PyUnicode_UTF8(self) = data;
PyUnicode_SET_UTF8_LENGTH(self, length);
PyUnicode_SET_UTF8(self, data);
}

memcpy(data, PyUnicode_DATA(unicode), kind * (length + 1));
Expand Down

0 comments on commit f802c8b

Please sign in to comment.