From c016423b3829354582c9449923b4833ab53b5eda Mon Sep 17 00:00:00 2001 From: Friedemann Kleint Date: Tue, 3 Oct 2023 21:09:30 +0200 Subject: Adapt UNICODE structures to Python 3.12 Some fields were removed from the structures, requiring us to branch on version. Task-number: PYSIDE-2230 Change-Id: Ifee3558af18fab992b5fc8efac944664fff20164 Reviewed-by: Christian Tismer (cherry picked from commit 6ba340c03a7c66bea0ed741ea695e234078bc039) Reviewed-by: Qt Cherry-pick Bot (cherry picked from commit 1975949af42015ce89f85d05518e66bbe7a0f5ae) --- sources/shiboken6/libshiboken/pep384impl.cpp | 72 ++++++++++++++++++++++++---- 1 file changed, 64 insertions(+), 8 deletions(-) diff --git a/sources/shiboken6/libshiboken/pep384impl.cpp b/sources/shiboken6/libshiboken/pep384impl.cpp index 7b3c205ec..fd0b9063e 100644 --- a/sources/shiboken6/libshiboken/pep384impl.cpp +++ b/sources/shiboken6/libshiboken/pep384impl.cpp @@ -251,7 +251,7 @@ _PepType_Lookup(PyTypeObject *type, PyObject *name) // structs and macros modelled after their equivalents in // cpython/Include/cpython/unicodeobject.h -struct PepASCIIObject +struct PepASCIIObject // since 3.12 { PyObject_HEAD Py_ssize_t length; /* Number of code points in the string */ @@ -264,18 +264,29 @@ struct PepASCIIObject unsigned int ready:1; unsigned int :24; } state; +}; + +struct PepASCIIObject_311 : public PepASCIIObject +{ wchar_t *wstr; /* wchar_t representation (null-terminated) */ }; -struct PepCompactUnicodeObject +struct PepCompactUnicodeObject // since 3.12 { PepASCIIObject _base; Py_ssize_t utf8_length; char *utf8; /* UTF-8 representation (null-terminated) */ +}; + +struct PepCompactUnicodeObject_311 // since 3.12 +{ + PepASCIIObject_311 _base; + Py_ssize_t utf8_length; + char *utf8; /* UTF-8 representation (null-terminated) */ Py_ssize_t wstr_length; /* Number of code points in wstr */ }; -struct PepUnicodeObject +struct PepUnicodeObject // since 3.12 { PepCompactUnicodeObject _base; union { @@ -286,6 +297,17 @@ struct PepUnicodeObject } data; /* Canonical, smallest-form Unicode buffer */ }; +struct PepUnicodeObject_311 +{ + PepCompactUnicodeObject_311 _base; + union { + void *any; + Py_UCS1 *latin1; + Py_UCS2 *ucs2; + Py_UCS4 *ucs4; + } data; /* Canonical, smallest-form Unicode buffer */ +}; + int _PepUnicode_KIND(PyObject *str) { return reinterpret_cast(str)->state.kind; @@ -303,18 +325,33 @@ int _PepUnicode_IS_COMPACT(PyObject *str) return asciiObj->state.compact; } -static void *_PepUnicode_COMPACT_DATA(PyObject *str) +static void *_PepUnicode_ASCII_DATA(PyObject *str) { + if (_PepRuntimeVersion() < 0x030C00) { + auto *asciiObj_311 = reinterpret_cast(str); + return asciiObj_311 + 1; + } auto *asciiObj = reinterpret_cast(str); - if (asciiObj->state.ascii) - return asciiObj + 1; + return asciiObj + 1; +} + +static void *_PepUnicode_COMPACT_DATA(PyObject *str) +{ + if (_PepUnicode_IS_ASCII(str) != 0) + return _PepUnicode_ASCII_DATA(str); + if (_PepRuntimeVersion() < 0x030C00) { + auto *compactObj_311 = reinterpret_cast(str); + return compactObj_311 + 1; + } auto *compactObj = reinterpret_cast(str); return compactObj + 1; } static void *_PepUnicode_NONCOMPACT_DATA(PyObject *str) { - return reinterpret_cast(str)->data.any; + return _PepRuntimeVersion() < 0x030C00 + ? reinterpret_cast(str)->data.any + : reinterpret_cast(str)->data.any; } void *_PepUnicode_DATA(PyObject *str) @@ -325,6 +362,23 @@ void *_PepUnicode_DATA(PyObject *str) // Fast path accessing UTF8 data without doing a conversion similar // to _PyUnicode_AsUTF8String +static const char *utf8FastPath_311(PyObject *str) +{ + if (PyUnicode_GetLength(str) == 0) + return ""; + auto *asciiObj = reinterpret_cast(str); + if (asciiObj->state.kind != PepUnicode_1BYTE_KIND || asciiObj->state.compact == 0) + return nullptr; // Empirical: PyCompactUnicodeObject.utf8 is only valid for 1 byte + if (asciiObj->state.ascii) { + auto *data = asciiObj + 1; + return reinterpret_cast(data); + } + auto *compactObj = reinterpret_cast(str); + if (compactObj->utf8_length) + return compactObj->utf8; + return nullptr; +} + static const char *utf8FastPath(PyObject *str) { if (PyUnicode_GetLength(str) == 0) @@ -356,8 +410,10 @@ const char *_PepUnicode_AsString(PyObject *str) #define TOSTRING(x) STRINGIFY(x) #define AT __FILE__ ":" TOSTRING(__LINE__) - if (const auto *utf8 = utf8FastPath(str)) + if (const auto *utf8 = _PepRuntimeVersion() < 0x030C00 + ? utf8FastPath_311(str) : utf8FastPath(str)) { return utf8; + } static PyObject *cstring_dict = nullptr; if (cstring_dict == nullptr) { -- cgit v1.2.3