diff options
author | Friedemann Kleint <Friedemann.Kleint@qt.io> | 2021-09-29 17:07:27 +0200 |
---|---|---|
committer | Friedemann Kleint <Friedemann.Kleint@qt.io> | 2021-09-30 11:45:02 +0200 |
commit | e19a6a9aac1a2ddc9724361a148b4e9542f4e4b6 (patch) | |
tree | fefe18ed95c0f6e0a94bb7768c8b4843cbdd166f | |
parent | 5413343de9f546e4c177a062e21e1e1ab7c76ae5 (diff) |
Limited API: Add a fast path to _PepUnicode_AsString()
Add a helper that returns the char * UTF-8 data from a
PyUnicode object without triggering conversions.
Change _PepUnicode_AsString() to return const char *
to match Python 3 on this occasion.
[ChangeLog][shiboken6] A fast path for converting
Python string to UTF-8 has been added to the limited API.
Change-Id: I619933633e226525fc248b27fd42cd1ce5178be0
Reviewed-by: Christian Tismer <tismer@stackless.com>
-rw-r--r-- | sources/shiboken6/libshiboken/pep384impl.cpp | 25 | ||||
-rw-r--r-- | sources/shiboken6/libshiboken/pep384impl.h | 2 |
2 files changed, 24 insertions, 3 deletions
diff --git a/sources/shiboken6/libshiboken/pep384impl.cpp b/sources/shiboken6/libshiboken/pep384impl.cpp index d2a2dd68a..d9b7bd4fb 100644 --- a/sources/shiboken6/libshiboken/pep384impl.cpp +++ b/sources/shiboken6/libshiboken/pep384impl.cpp @@ -359,8 +359,26 @@ void *_PepUnicode_DATA(PyObject *str) ? _PepUnicode_COMPACT_DATA(str) : _PepUnicode_NONCOMPACT_DATA(str); } -char * -_PepUnicode_AsString(PyObject *str) +// Fast path accessing UTF8 data without doing a conversion similar +// to _PyUnicode_AsUTF8String +static const char *utf8FastPath(PyObject *str) +{ + if (PyUnicode_GetLength(str) == 0) + return ""; + auto *asciiObj = reinterpret_cast<PepASCIIObject *>(str); + if (asciiObj->state.kind != PepUnicode_1BYTE_KIND) + return nullptr; // Empirical: PyCompactUnicodeObject.utf8 is only valid for 1 byte + if (asciiObj->state.ascii) { + auto *data = asciiObj + 1; + return reinterpret_cast<const char *>(data); + } + auto *compactObj = reinterpret_cast<PepCompactUnicodeObject *>(str); + if (compactObj->utf8_length) + return compactObj->utf8; + return nullptr; +} + +const char *_PepUnicode_AsString(PyObject *str) { /* * We need to keep the string alive but cannot borrow the Python object. @@ -373,6 +391,9 @@ _PepUnicode_AsString(PyObject *str) #define TOSTRING(x) STRINGIFY(x) #define AT __FILE__ ":" TOSTRING(__LINE__) + if (const auto *utf8 = utf8FastPath(str)) + return utf8; + static PyObject *cstring_dict = nullptr; if (cstring_dict == nullptr) { cstring_dict = PyDict_New(); diff --git a/sources/shiboken6/libshiboken/pep384impl.h b/sources/shiboken6/libshiboken/pep384impl.h index 2c9551ea6..551038f84 100644 --- a/sources/shiboken6/libshiboken/pep384impl.h +++ b/sources/shiboken6/libshiboken/pep384impl.h @@ -240,7 +240,7 @@ LIBSHIBOKEN_API int Pep_GetVerboseFlag(void); #ifdef Py_LIMITED_API -LIBSHIBOKEN_API char *_PepUnicode_AsString(PyObject *); +LIBSHIBOKEN_API const char *_PepUnicode_AsString(PyObject *); enum PepUnicode_Kind { PepUnicode_WCHAR_KIND = 0, |