path: root/src/corelib/tools/qstring.cpp
diff options
Diffstat (limited to 'src/corelib/tools/qstring.cpp')
1 files changed, 298 insertions, 103 deletions
diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp
index 3b18d31547..33326f141b 100644
--- a/src/corelib/tools/qstring.cpp
+++ b/src/corelib/tools/qstring.cpp
@@ -223,8 +223,15 @@ void qt_from_latin1(ushort *dst, const char *str, size_t size)
// we're going to read str[offset..offset+15] (16 bytes)
for ( ; str + offset + 15 < e; offset += 16) {
- const __m128i nullMask = _mm_set1_epi32(0);
const __m128i chunk = _mm_loadu_si128((__m128i*)(str + offset)); // load
+#ifdef __AVX2__
+ // zero extend to an YMM register
+ const __m256i extended = _mm256_cvtepu8_epi16(chunk);
+ // store
+ _mm256_storeu_si256((__m256i*)(dst + offset), extended);
+ const __m128i nullMask = _mm_set1_epi32(0);
// unpack the first 8 bytes, padding with zeros
const __m128i firstHalf = _mm_unpacklo_epi8(chunk, nullMask);
@@ -233,6 +240,7 @@ void qt_from_latin1(ushort *dst, const char *str, size_t size)
// unpack the last 8 bytes, padding with zeros
const __m128i secondHalf = _mm_unpackhi_epi8 (chunk, nullMask);
_mm_storeu_si128((__m128i*)(dst + offset + 8), secondHalf); // store
size = size % 16;
@@ -540,8 +548,20 @@ static int ucstrncmp(const QChar *a, const uchar *c, int l)
// and c[offset..offset+15] (16 bytes)
for ( ; uc + offset + 15 < e; offset += 16) {
// similar to fromLatin1_helper:
- // load Latin 1 data and expand to UTF-16
+ // load 16 bytes of Latin 1 data
__m128i chunk = _mm_loadu_si128((__m128i*)(c + offset));
+# ifdef __AVX2__
+ // expand Latin 1 data via zero extension
+ __m256i ldata = _mm256_cvtepu8_epi16(chunk);
+ // load UTF-16 data and compare
+ __m256i ucdata = _mm256_loadu_si256((__m256i*)(uc + offset));
+ __m256i result = _mm256_cmpeq_epi16(ldata, ucdata);
+ uint mask = ~_mm256_movemask_epi8(result);
+# else
+ // expand via unpacking
__m128i firstHalf = _mm_unpacklo_epi8(chunk, nullmask);
__m128i secondHalf = _mm_unpackhi_epi8(chunk, nullmask);
@@ -552,6 +572,7 @@ static int ucstrncmp(const QChar *a, const uchar *c, int l)
__m128i result2 = _mm_cmpeq_epi16(secondHalf, ucdata2);
uint mask = ~(_mm_movemask_epi8(result1) | _mm_movemask_epi8(result2) << 16);
+# endif
if (mask) {
// found a different character
uint idx = uint(_bit_scan_forward(mask));
@@ -1306,7 +1327,7 @@ const QString::Null QString::null = { };
Returns a copy of the \a str string. The given string is converted
to Unicode using the fromUtf8() function.
- \sa fromLatin1(), fromLocal8Bit(), fromUtf8()
+ \sa fromLatin1(), fromLocal8Bit(), fromUtf8(), QByteArray::fromStdString()
/*! \fn QString QString::fromStdWString(const std::wstring &str)
@@ -1323,8 +1344,8 @@ const QString::Null QString::null = { };
\since 4.2
Returns a copy of the \a string, where the encoding of \a string depends on
- the size of wchar. If wchar is 4 bytes, the \a string is interpreted as ucs-4,
- if wchar is 2 bytes it is interpreted as ucs-2.
+ the size of wchar. If wchar is 4 bytes, the \a string is interpreted as UCS-4,
+ if wchar is 2 bytes it is interpreted as UTF-16.
If \a size is -1 (default), the \a string has to be 0 terminated.
@@ -1359,8 +1380,8 @@ int QString::toUcs4_helper(const ushort *uc, int length, uint *out)
\since 4.2
Fills the \a array with the data contained in this QString object.
- The array is encoded in utf16 on platforms where
- wchar_t is 2 bytes wide (e.g. windows) and in ucs4 on platforms
+ The array is encoded in UTF-16 on platforms where
+ wchar_t is 2 bytes wide (e.g. windows) and in UCS-4 on platforms
where wchar_t is 4 bytes wide (most Unix systems).
\a array has to be allocated by the caller and contain enough space to
@@ -4087,21 +4108,22 @@ QString QString::right(int n) const
QString QString::mid(int position, int n) const
- if (position > d->size)
+ using namespace QtPrivate;
+ switch (QContainerImplHelper::mid(d->size, &position, &n)) {
+ case QContainerImplHelper::Null:
return QString();
- if (position < 0) {
- if (n < 0 || n + position >= d->size)
- return *this;
- if (n + position <= 0)
- return QString();
- n += position;
- position = 0;
- } else if (uint(n) > uint(d->size - position))
- n = d->size - position;
- if (position == 0 && n == d->size)
+ case QContainerImplHelper::Empty:
+ {
+ QStringDataPtr empty = { Data::allocate(0) };
+ return QString(empty);
+ }
+ case QContainerImplHelper::Full:
return *this;
- return QString((const QChar*) d->data() + position, n);
+ case QContainerImplHelper::Subset:
+ return QString((const QChar*)d->data() + position, n);
+ }
+ return QString();
@@ -6599,6 +6621,27 @@ QString QString::number(double n, char f, int prec)
return s;
+namespace {
+template<class ResultList, class StringSource, typename MidMethod, typename Separtor>
+static ResultList splitString(const StringSource &source, MidMethod mid, const Separtor &sep,
+ QString::SplitBehavior behavior, Qt::CaseSensitivity cs, const int separatorSize)
+ ResultList list;
+ int start = 0;
+ int end;
+ int extra = 0;
+ while ((end = source.indexOf(sep, start + extra, cs)) != -1) {
+ if (start != end || behavior == QString::KeepEmptyParts)
+ list.append((source.*mid)(start, end - start));
+ start = end + separatorSize;
+ extra = (separatorSize == 0 ? 1 : 0);
+ }
+ if (start != source.size() || behavior == QString::KeepEmptyParts)
+ list.append((source.*mid)(start, -1));
+ return list;
+} // namespace
Splits the string into substrings wherever \a sep occurs, and
returns the list of those strings. If \a sep does not match
@@ -6619,40 +6662,102 @@ QString QString::number(double n, char f, int prec)
QStringList QString::split(const QString &sep, SplitBehavior behavior, Qt::CaseSensitivity cs) const
- QStringList list;
- int start = 0;
- int extra = 0;
- int end;
- while ((end = indexOf(sep, start + extra, cs)) != -1) {
- if (start != end || behavior == KeepEmptyParts)
- list.append(mid(start, end - start));
- start = end + sep.size();
- extra = (sep.size() == 0 ? 1 : 0);
- }
- if (start != size() || behavior == KeepEmptyParts)
- list.append(mid(start));
- return list;
+ return splitString<QStringList>(*this, &QString::mid, sep, behavior, cs, sep.size());
+ Splits the string into substring references wherever \a sep occurs, and
+ returns the list of those strings. If \a sep does not match
+ anywhere in the string, splitRef() returns a single-element vector
+ containing this string reference.
+ \a cs specifies whether \a sep should be matched case
+ sensitively or case insensitively.
+ If \a behavior is QString::SkipEmptyParts, empty entries don't
+ appear in the result. By default, empty entries are kept.
+ \note All references are valid as long this string is alive. Destroying this
+ string will cause all references be dangling pointers.
+ \since 5.4
+ \sa QStringRef split()
+QVector<QStringRef> QString::splitRef(const QString &sep, SplitBehavior behavior, Qt::CaseSensitivity cs) const
+ return splitString<QVector<QStringRef> >(*this, &QString::midRef, sep, behavior, cs, sep.size());
QStringList QString::split(QChar sep, SplitBehavior behavior, Qt::CaseSensitivity cs) const
- QStringList list;
+ return splitString<QStringList>(*this, &QString::mid, sep, behavior, cs, 1);
+ \overload
+ \since 5.4
+QVector<QStringRef> QString::splitRef(QChar sep, SplitBehavior behavior, Qt::CaseSensitivity cs) const
+ return splitString<QVector<QStringRef> >(*this, &QString::midRef, sep, behavior, cs, 1);
+ Splits the string into substrings references wherever \a sep occurs, and
+ returns the list of those strings. If \a sep does not match
+ anywhere in the string, split() returns a single-element vector
+ containing this string reference.
+ \a cs specifies whether \a sep should be matched case
+ sensitively or case insensitively.
+ If \a behavior is QString::SkipEmptyParts, empty entries don't
+ appear in the result. By default, empty entries are kept.
+ \note All references are valid as long this string is alive. Destroying this
+ string will cause all references be dangling pointers.
+ \since 5.4
+QVector<QStringRef> QStringRef::split(const QString &sep, QString::SplitBehavior behavior, Qt::CaseSensitivity cs) const
+ return splitString<QVector<QStringRef> >(*this, &QStringRef::mid, sep, behavior, cs, sep.size());
+ \overload
+ \since 5.4
+QVector<QStringRef> QStringRef::split(QChar sep, QString::SplitBehavior behavior, Qt::CaseSensitivity cs) const
+ return splitString<QVector<QStringRef> >(*this, &QStringRef::mid, sep, behavior, cs, 1);
+#ifndef QT_NO_REGEXP
+namespace {
+template<class ResultList, typename MidMethod>
+static ResultList splitString(const QString &source, MidMethod mid, const QRegExp &rx, QString::SplitBehavior behavior)
+ QRegExp rx2(rx);
+ ResultList list;
int start = 0;
+ int extra = 0;
int end;
- while ((end = indexOf(sep, start, cs)) != -1) {
- if (start != end || behavior == KeepEmptyParts)
- list.append(mid(start, end - start));
- start = end + 1;
+ while ((end = rx2.indexIn(source, start + extra)) != -1) {
+ int matchedLen = rx2.matchedLength();
+ if (start != end || behavior == QString::KeepEmptyParts)
+ list.append((source.*mid)(start, end - start));
+ start = end + matchedLen;
+ extra = (matchedLen == 0) ? 1 : 0;
- if (start != size() || behavior == KeepEmptyParts)
- list.append(mid(start));
+ if (start != source.size() || behavior == QString::KeepEmptyParts)
+ list.append((source.*mid)(start, -1));
return list;
+} // namespace
-#ifndef QT_NO_REGEXP
@@ -6681,26 +6786,60 @@ QStringList QString::split(QChar sep, SplitBehavior behavior, Qt::CaseSensitivit
QStringList QString::split(const QRegExp &rx, SplitBehavior behavior) const
- QRegExp rx2(rx);
- QStringList list;
- int start = 0;
- int extra = 0;
- int end;
- while ((end = rx2.indexIn(*this, start + extra)) != -1) {
- int matchedLen = rx2.matchedLength();
- if (start != end || behavior == KeepEmptyParts)
- list.append(mid(start, end - start));
- start = end + matchedLen;
- extra = (matchedLen == 0) ? 1 : 0;
- }
- if (start != size() || behavior == KeepEmptyParts)
- list.append(mid(start));
- return list;
+ return splitString<QStringList>(*this, &QString::mid, rx, behavior);
+ \overload
+ \since 5.4
+ Splits the string into substring references wherever the regular expression
+ \a rx matches, and returns the list of those strings. If \a rx
+ does not match anywhere in the string, splitRef() returns a
+ single-element vector containing this string reference.
+ \note All references are valid as long this string is alive. Destroying this
+ string will cause all references be dangling pointers.
+ \sa QStringRef split()
+QVector<QStringRef> QString::splitRef(const QRegExp &rx, SplitBehavior behavior) const
+ return splitString<QVector<QStringRef> >(*this, &QString::midRef, rx, behavior);
+namespace {
+template<class ResultList, typename MidMethod>
+static ResultList splitString(const QString &source, MidMethod mid, const QRegularExpression &re,
+ QString::SplitBehavior behavior)
+ ResultList list;
+ if (!re.isValid()) {
+ qWarning("QString::split: invalid QRegularExpression object");
+ return list;
+ }
+ int start = 0;
+ int end = 0;
+ QRegularExpressionMatchIterator iterator = re.globalMatch(source);
+ while (iterator.hasNext()) {
+ QRegularExpressionMatch match =;
+ end = match.capturedStart();
+ if (start != end || behavior == QString::KeepEmptyParts)
+ list.append((source.*mid)(start, end - start));
+ start = match.capturedEnd();
+ }
+ if (start != source.size() || behavior == QString::KeepEmptyParts)
+ list.append((source.*mid)(start, -1));
+ return list;
+} // namespace
\since 5.0
@@ -6730,27 +6869,26 @@ QStringList QString::split(const QRegExp &rx, SplitBehavior behavior) const
QStringList QString::split(const QRegularExpression &re, SplitBehavior behavior) const
- QStringList list;
- if (!re.isValid()) {
- qWarning("QString::split: invalid QRegularExpression object");
- return list;
- }
+ return splitString<QStringList>(*this, &QString::mid, re, behavior);
- int start = 0;
- int end = 0;
- QRegularExpressionMatchIterator iterator = re.globalMatch(*this);
- while (iterator.hasNext()) {
- QRegularExpressionMatch match =;
- end = match.capturedStart();
- if (start != end || behavior == KeepEmptyParts)
- list.append(mid(start, end - start));
- start = match.capturedEnd();
- }
+ \overload
+ \since 5.4
- if (start != size() || behavior == KeepEmptyParts)
- list.append(mid(start));
+ Splits the string into substring references wherever the regular expression
+ \a re matches, and returns the list of those strings. If \a re
+ does not match anywhere in the string, splitRef() returns a
+ single-element vector containing this string reference.
- return list;
+ \note All references are valid as long this string is alive. Destroying this
+ string will cause all references be dangling pointers.
+ \sa split() QStringRef
+QVector<QStringRef> QString::splitRef(const QRegularExpression &re, SplitBehavior behavior) const
+ return splitString<QVector<QStringRef> >(*this, &QString::midRef, re, behavior);
@@ -6935,15 +7073,18 @@ static ArgEscapeData findArgEscapes(const QString &s)
- if (c->digitValue() == -1)
+ int escape = c->digitValue();
+ if (escape == -1)
- int escape = c->digitValue();
- if (c != uc_end && c->digitValue() != -1) {
- escape = (10 * escape) + c->digitValue();
- ++c;
+ if (c != uc_end) {
+ int next_escape = c->digitValue();
+ if (next_escape != -1) {
+ escape = (10 * escape) + next_escape;
+ ++c;
+ }
if (escape > d.min_escape)
@@ -7720,7 +7861,7 @@ bool QString::isRightToLeft() const
This method is mostly useful to pass a QString to a function
that accepts a std::string object.
- \sa toLatin1(), toUtf8(), toLocal8Bit()
+ \sa toLatin1(), toUtf8(), toLocal8Bit(), QByteArray::toStdString()
@@ -8311,6 +8452,30 @@ QDataStream &operator>>(QDataStream &in, QString &str)
\sa {Implicitly Shared Classes}
+ \typedef QStringRef::size_type
+ \internal
+ \typedef QStringRef::value_type
+ \internal
+ \typedef QStringRef::const_pointer
+ \internal
+ \typedef QStringRef::const_reference
+ \internal
+ \typedef QStringRef::const_iterator
+ \internal
\fn QStringRef::QStringRef()
@@ -8434,6 +8599,36 @@ ownership of it, no memory is freed when instances are destroyed.
+ \fn const QChar *QStringRef::begin() const
+ \since 5.4
+ Same as unicode().
+ \fn const QChar *QStringRef::cbegin() const
+ \since 5.4
+ Same as unicode().
+ \fn const QChar *QStringRef::end() const
+ \since 5.4
+ Returns a pointer to one character past the last one in this string.
+ (It is the same as \c {unicode() + size()}.)
+ \fn const QChar *QStringRef::cend() const
+ \since 5.4
+ Returns a pointer to one character past the last one in this string.
+ (It is the same as \c {unicode() + size()}.)
\fn const QChar *QStringRef::constData() const
Same as unicode().
@@ -8876,19 +9071,19 @@ QStringRef QString::rightRef(int n) const
QStringRef QStringRef::mid(int pos, int n) const
- if (pos > m_size)
+ using namespace QtPrivate;
+ switch (QContainerImplHelper::mid(m_size, &pos, &n)) {
+ case QContainerImplHelper::Null:
return QStringRef();
- if (pos < 0) {
- if (n < 0 || n + pos >= m_size)
- return QStringRef(m_string, m_position, m_size);
- if (n + pos <= 0)
- return QStringRef();
- n += pos;
- pos = 0;
- } else if (uint(n) > uint(m_size - pos)) {
- n = m_size - pos;
+ case QContainerImplHelper::Empty:
+ return QStringRef(m_string, 0, 0);
+ case QContainerImplHelper::Full:
+ return *this;
+ case QContainerImplHelper::Subset:
+ return QStringRef(m_string, pos + m_position, n);
- return QStringRef(m_string, pos + m_position, n);
+ return QStringRef();
@@ -8913,19 +9108,19 @@ QStringRef QStringRef::mid(int pos, int n) const
QStringRef QString::midRef(int position, int n) const
- if (position > d->size)
+ using namespace QtPrivate;
+ switch (QContainerImplHelper::mid(d->size, &position, &n)) {
+ case QContainerImplHelper::Null:
return QStringRef();
- if (position < 0) {
- if (n < 0 || n + position >= d->size)
- return QStringRef(this, 0, d->size);
- if (n + position <= 0)
- return QStringRef();
- n += position;
- position = 0;
- } else if (uint(n) > uint(d->size - position))
- n = d->size - position;
- return QStringRef(this, position, n);
+ case QContainerImplHelper::Empty:
+ return QStringRef(this, 0, 0);
+ case QContainerImplHelper::Full:
+ return QStringRef(this, 0, d->size);
+ case QContainerImplHelper::Subset:
+ return QStringRef(this, position, n);
+ }
+ return QStringRef();