diff options
author | Friedemann Kleint <Friedemann.Kleint@qt.io> | 2023-10-03 21:09:30 +0200 |
---|---|---|
committer | Qt Cherry-pick Bot <cherrypick_bot@qt-project.org> | 2023-10-05 12:28:04 +0000 |
commit | c016423b3829354582c9449923b4833ab53b5eda (patch) | |
tree | 712f286160ef2c749a40ae2876a8679dbc498e5f | |
parent | 1e9a7c56e99e616c725e0b4eebf1484336f218d4 (diff) |
Adapt UNICODE structures to Python 3.12
Some fields were removed from the structures, requiring us to branch
on version.
Task-number: PYSIDE-2230
Change-Id: Ifee3558af18fab992b5fc8efac944664fff20164
Reviewed-by: Christian Tismer <tismer@stackless.com>
(cherry picked from commit 6ba340c03a7c66bea0ed741ea695e234078bc039)
Reviewed-by: Qt Cherry-pick Bot <cherrypick_bot@qt-project.org>
(cherry picked from commit 1975949af42015ce89f85d05518e66bbe7a0f5ae)
-rw-r--r-- | sources/shiboken6/libshiboken/pep384impl.cpp | 72 |
1 files changed, 64 insertions, 8 deletions
diff --git a/sources/shiboken6/libshiboken/pep384impl.cpp b/sources/shiboken6/libshiboken/pep384impl.cpp index 7b3c205ec..fd0b9063e 100644 --- a/sources/shiboken6/libshiboken/pep384impl.cpp +++ b/sources/shiboken6/libshiboken/pep384impl.cpp @@ -251,7 +251,7 @@ _PepType_Lookup(PyTypeObject *type, PyObject *name) // structs and macros modelled after their equivalents in // cpython/Include/cpython/unicodeobject.h -struct PepASCIIObject +struct PepASCIIObject // since 3.12 { PyObject_HEAD Py_ssize_t length; /* Number of code points in the string */ @@ -264,18 +264,29 @@ struct PepASCIIObject unsigned int ready:1; unsigned int :24; } state; +}; + +struct PepASCIIObject_311 : public PepASCIIObject +{ wchar_t *wstr; /* wchar_t representation (null-terminated) */ }; -struct PepCompactUnicodeObject +struct PepCompactUnicodeObject // since 3.12 { PepASCIIObject _base; Py_ssize_t utf8_length; char *utf8; /* UTF-8 representation (null-terminated) */ +}; + +struct PepCompactUnicodeObject_311 // since 3.12 +{ + PepASCIIObject_311 _base; + Py_ssize_t utf8_length; + char *utf8; /* UTF-8 representation (null-terminated) */ Py_ssize_t wstr_length; /* Number of code points in wstr */ }; -struct PepUnicodeObject +struct PepUnicodeObject // since 3.12 { PepCompactUnicodeObject _base; union { @@ -286,6 +297,17 @@ struct PepUnicodeObject } data; /* Canonical, smallest-form Unicode buffer */ }; +struct PepUnicodeObject_311 +{ + PepCompactUnicodeObject_311 _base; + union { + void *any; + Py_UCS1 *latin1; + Py_UCS2 *ucs2; + Py_UCS4 *ucs4; + } data; /* Canonical, smallest-form Unicode buffer */ +}; + int _PepUnicode_KIND(PyObject *str) { return reinterpret_cast<PepASCIIObject *>(str)->state.kind; @@ -303,18 +325,33 @@ int _PepUnicode_IS_COMPACT(PyObject *str) return asciiObj->state.compact; } -static void *_PepUnicode_COMPACT_DATA(PyObject *str) +static void *_PepUnicode_ASCII_DATA(PyObject *str) { + if (_PepRuntimeVersion() < 0x030C00) { + auto *asciiObj_311 = reinterpret_cast<PepASCIIObject_311 *>(str); + return asciiObj_311 + 1; + } auto *asciiObj = reinterpret_cast<PepASCIIObject *>(str); - if (asciiObj->state.ascii) - return asciiObj + 1; + return asciiObj + 1; +} + +static void *_PepUnicode_COMPACT_DATA(PyObject *str) +{ + if (_PepUnicode_IS_ASCII(str) != 0) + return _PepUnicode_ASCII_DATA(str); + if (_PepRuntimeVersion() < 0x030C00) { + auto *compactObj_311 = reinterpret_cast<PepCompactUnicodeObject_311 *>(str); + return compactObj_311 + 1; + } auto *compactObj = reinterpret_cast<PepCompactUnicodeObject *>(str); return compactObj + 1; } static void *_PepUnicode_NONCOMPACT_DATA(PyObject *str) { - return reinterpret_cast<PepUnicodeObject *>(str)->data.any; + return _PepRuntimeVersion() < 0x030C00 + ? reinterpret_cast<PepUnicodeObject_311 *>(str)->data.any + : reinterpret_cast<PepUnicodeObject *>(str)->data.any; } void *_PepUnicode_DATA(PyObject *str) @@ -325,6 +362,23 @@ void *_PepUnicode_DATA(PyObject *str) // Fast path accessing UTF8 data without doing a conversion similar // to _PyUnicode_AsUTF8String +static const char *utf8FastPath_311(PyObject *str) +{ + if (PyUnicode_GetLength(str) == 0) + return ""; + auto *asciiObj = reinterpret_cast<PepASCIIObject_311 *>(str); + if (asciiObj->state.kind != PepUnicode_1BYTE_KIND || asciiObj->state.compact == 0) + return nullptr; // Empirical: PyCompactUnicodeObject.utf8 is only valid for 1 byte + if (asciiObj->state.ascii) { + auto *data = asciiObj + 1; + return reinterpret_cast<const char *>(data); + } + auto *compactObj = reinterpret_cast<PepCompactUnicodeObject_311 *>(str); + if (compactObj->utf8_length) + return compactObj->utf8; + return nullptr; +} + static const char *utf8FastPath(PyObject *str) { if (PyUnicode_GetLength(str) == 0) @@ -356,8 +410,10 @@ const char *_PepUnicode_AsString(PyObject *str) #define TOSTRING(x) STRINGIFY(x) #define AT __FILE__ ":" TOSTRING(__LINE__) - if (const auto *utf8 = utf8FastPath(str)) + if (const auto *utf8 = _PepRuntimeVersion() < 0x030C00 + ? utf8FastPath_311(str) : utf8FastPath(str)) { return utf8; + } static PyObject *cstring_dict = nullptr; if (cstring_dict == nullptr) { |