aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFriedemann Kleint <Friedemann.Kleint@qt.io>2021-09-29 17:07:27 +0200
committerFriedemann Kleint <Friedemann.Kleint@qt.io>2021-09-30 11:45:02 +0200
commite19a6a9aac1a2ddc9724361a148b4e9542f4e4b6 (patch)
treefefe18ed95c0f6e0a94bb7768c8b4843cbdd166f
parent5413343de9f546e4c177a062e21e1e1ab7c76ae5 (diff)
Limited API: Add a fast path to _PepUnicode_AsString()
Add a helper that returns the char * UTF-8 data from a PyUnicode object without triggering conversions. Change _PepUnicode_AsString() to return const char * to match Python 3 on this occasion. [ChangeLog][shiboken6] A fast path for converting Python string to UTF-8 has been added to the limited API. Change-Id: I619933633e226525fc248b27fd42cd1ce5178be0 Reviewed-by: Christian Tismer <tismer@stackless.com>
-rw-r--r--sources/shiboken6/libshiboken/pep384impl.cpp25
-rw-r--r--sources/shiboken6/libshiboken/pep384impl.h2
2 files changed, 24 insertions, 3 deletions
diff --git a/sources/shiboken6/libshiboken/pep384impl.cpp b/sources/shiboken6/libshiboken/pep384impl.cpp
index d2a2dd68a..d9b7bd4fb 100644
--- a/sources/shiboken6/libshiboken/pep384impl.cpp
+++ b/sources/shiboken6/libshiboken/pep384impl.cpp
@@ -359,8 +359,26 @@ void *_PepUnicode_DATA(PyObject *str)
? _PepUnicode_COMPACT_DATA(str) : _PepUnicode_NONCOMPACT_DATA(str);
}
-char *
-_PepUnicode_AsString(PyObject *str)
+// Fast path accessing UTF8 data without doing a conversion similar
+// to _PyUnicode_AsUTF8String
+static const char *utf8FastPath(PyObject *str)
+{
+ if (PyUnicode_GetLength(str) == 0)
+ return "";
+ auto *asciiObj = reinterpret_cast<PepASCIIObject *>(str);
+ if (asciiObj->state.kind != PepUnicode_1BYTE_KIND)
+ return nullptr; // Empirical: PyCompactUnicodeObject.utf8 is only valid for 1 byte
+ if (asciiObj->state.ascii) {
+ auto *data = asciiObj + 1;
+ return reinterpret_cast<const char *>(data);
+ }
+ auto *compactObj = reinterpret_cast<PepCompactUnicodeObject *>(str);
+ if (compactObj->utf8_length)
+ return compactObj->utf8;
+ return nullptr;
+}
+
+const char *_PepUnicode_AsString(PyObject *str)
{
/*
* We need to keep the string alive but cannot borrow the Python object.
@@ -373,6 +391,9 @@ _PepUnicode_AsString(PyObject *str)
#define TOSTRING(x) STRINGIFY(x)
#define AT __FILE__ ":" TOSTRING(__LINE__)
+ if (const auto *utf8 = utf8FastPath(str))
+ return utf8;
+
static PyObject *cstring_dict = nullptr;
if (cstring_dict == nullptr) {
cstring_dict = PyDict_New();
diff --git a/sources/shiboken6/libshiboken/pep384impl.h b/sources/shiboken6/libshiboken/pep384impl.h
index 2c9551ea6..551038f84 100644
--- a/sources/shiboken6/libshiboken/pep384impl.h
+++ b/sources/shiboken6/libshiboken/pep384impl.h
@@ -240,7 +240,7 @@ LIBSHIBOKEN_API int Pep_GetVerboseFlag(void);
#ifdef Py_LIMITED_API
-LIBSHIBOKEN_API char *_PepUnicode_AsString(PyObject *);
+LIBSHIBOKEN_API const char *_PepUnicode_AsString(PyObject *);
enum PepUnicode_Kind {
PepUnicode_WCHAR_KIND = 0,