diff options
Diffstat (limited to 'src/corelib/tools/qstring.cpp')
-rw-r--r-- | src/corelib/tools/qstring.cpp | 401 |
1 files changed, 298 insertions, 103 deletions
diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp index 3b18d31547..33326f141b 100644 --- a/src/corelib/tools/qstring.cpp +++ b/src/corelib/tools/qstring.cpp @@ -223,8 +223,15 @@ void qt_from_latin1(ushort *dst, const char *str, size_t size) // we're going to read str[offset..offset+15] (16 bytes) for ( ; str + offset + 15 < e; offset += 16) { - const __m128i nullMask = _mm_set1_epi32(0); const __m128i chunk = _mm_loadu_si128((__m128i*)(str + offset)); // load +#ifdef __AVX2__ + // zero extend to an YMM register + const __m256i extended = _mm256_cvtepu8_epi16(chunk); + + // store + _mm256_storeu_si256((__m256i*)(dst + offset), extended); +#else + const __m128i nullMask = _mm_set1_epi32(0); // unpack the first 8 bytes, padding with zeros const __m128i firstHalf = _mm_unpacklo_epi8(chunk, nullMask); @@ -233,6 +240,7 @@ void qt_from_latin1(ushort *dst, const char *str, size_t size) // unpack the last 8 bytes, padding with zeros const __m128i secondHalf = _mm_unpackhi_epi8 (chunk, nullMask); _mm_storeu_si128((__m128i*)(dst + offset + 8), secondHalf); // store +#endif } size = size % 16; @@ -540,8 +548,20 @@ static int ucstrncmp(const QChar *a, const uchar *c, int l) // and c[offset..offset+15] (16 bytes) for ( ; uc + offset + 15 < e; offset += 16) { // similar to fromLatin1_helper: - // load Latin 1 data and expand to UTF-16 + // load 16 bytes of Latin 1 data __m128i chunk = _mm_loadu_si128((__m128i*)(c + offset)); + +# ifdef __AVX2__ + // expand Latin 1 data via zero extension + __m256i ldata = _mm256_cvtepu8_epi16(chunk); + + // load UTF-16 data and compare + __m256i ucdata = _mm256_loadu_si256((__m256i*)(uc + offset)); + __m256i result = _mm256_cmpeq_epi16(ldata, ucdata); + + uint mask = ~_mm256_movemask_epi8(result); +# else + // expand via unpacking __m128i firstHalf = _mm_unpacklo_epi8(chunk, nullmask); __m128i secondHalf = _mm_unpackhi_epi8(chunk, nullmask); @@ -552,6 +572,7 @@ static int ucstrncmp(const QChar *a, const uchar *c, int l) __m128i result2 = _mm_cmpeq_epi16(secondHalf, ucdata2); uint mask = ~(_mm_movemask_epi8(result1) | _mm_movemask_epi8(result2) << 16); +# endif if (mask) { // found a different character uint idx = uint(_bit_scan_forward(mask)); @@ -1306,7 +1327,7 @@ const QString::Null QString::null = { }; Returns a copy of the \a str string. The given string is converted to Unicode using the fromUtf8() function. - \sa fromLatin1(), fromLocal8Bit(), fromUtf8() + \sa fromLatin1(), fromLocal8Bit(), fromUtf8(), QByteArray::fromStdString() */ /*! \fn QString QString::fromStdWString(const std::wstring &str) @@ -1323,8 +1344,8 @@ const QString::Null QString::null = { }; \since 4.2 Returns a copy of the \a string, where the encoding of \a string depends on - the size of wchar. If wchar is 4 bytes, the \a string is interpreted as ucs-4, - if wchar is 2 bytes it is interpreted as ucs-2. + the size of wchar. If wchar is 4 bytes, the \a string is interpreted as UCS-4, + if wchar is 2 bytes it is interpreted as UTF-16. If \a size is -1 (default), the \a string has to be 0 terminated. @@ -1359,8 +1380,8 @@ int QString::toUcs4_helper(const ushort *uc, int length, uint *out) \since 4.2 Fills the \a array with the data contained in this QString object. - The array is encoded in utf16 on platforms where - wchar_t is 2 bytes wide (e.g. windows) and in ucs4 on platforms + The array is encoded in UTF-16 on platforms where + wchar_t is 2 bytes wide (e.g. windows) and in UCS-4 on platforms where wchar_t is 4 bytes wide (most Unix systems). \a array has to be allocated by the caller and contain enough space to @@ -4087,21 +4108,22 @@ QString QString::right(int n) const QString QString::mid(int position, int n) const { - if (position > d->size) + using namespace QtPrivate; + switch (QContainerImplHelper::mid(d->size, &position, &n)) { + case QContainerImplHelper::Null: return QString(); - if (position < 0) { - if (n < 0 || n + position >= d->size) - return *this; - if (n + position <= 0) - return QString(); - - n += position; - position = 0; - } else if (uint(n) > uint(d->size - position)) - n = d->size - position; - if (position == 0 && n == d->size) + case QContainerImplHelper::Empty: + { + QStringDataPtr empty = { Data::allocate(0) }; + return QString(empty); + } + case QContainerImplHelper::Full: return *this; - return QString((const QChar*) d->data() + position, n); + case QContainerImplHelper::Subset: + return QString((const QChar*)d->data() + position, n); + } + Q_UNREACHABLE(); + return QString(); } /*! @@ -6599,6 +6621,27 @@ QString QString::number(double n, char f, int prec) return s; } +namespace { +template<class ResultList, class StringSource, typename MidMethod, typename Separtor> +static ResultList splitString(const StringSource &source, MidMethod mid, const Separtor &sep, + QString::SplitBehavior behavior, Qt::CaseSensitivity cs, const int separatorSize) +{ + ResultList list; + int start = 0; + int end; + int extra = 0; + while ((end = source.indexOf(sep, start + extra, cs)) != -1) { + if (start != end || behavior == QString::KeepEmptyParts) + list.append((source.*mid)(start, end - start)); + start = end + separatorSize; + extra = (separatorSize == 0 ? 1 : 0); + } + if (start != source.size() || behavior == QString::KeepEmptyParts) + list.append((source.*mid)(start, -1)); + return list; +} +} // namespace + /*! Splits the string into substrings wherever \a sep occurs, and returns the list of those strings. If \a sep does not match @@ -6619,40 +6662,102 @@ QString QString::number(double n, char f, int prec) */ QStringList QString::split(const QString &sep, SplitBehavior behavior, Qt::CaseSensitivity cs) const { - QStringList list; - int start = 0; - int extra = 0; - int end; - while ((end = indexOf(sep, start + extra, cs)) != -1) { - if (start != end || behavior == KeepEmptyParts) - list.append(mid(start, end - start)); - start = end + sep.size(); - extra = (sep.size() == 0 ? 1 : 0); - } - if (start != size() || behavior == KeepEmptyParts) - list.append(mid(start)); - return list; + return splitString<QStringList>(*this, &QString::mid, sep, behavior, cs, sep.size()); } /*! + Splits the string into substring references wherever \a sep occurs, and + returns the list of those strings. If \a sep does not match + anywhere in the string, splitRef() returns a single-element vector + containing this string reference. + + \a cs specifies whether \a sep should be matched case + sensitively or case insensitively. + + If \a behavior is QString::SkipEmptyParts, empty entries don't + appear in the result. By default, empty entries are kept. + + \note All references are valid as long this string is alive. Destroying this + string will cause all references be dangling pointers. + + \since 5.4 + \sa QStringRef split() +*/ +QVector<QStringRef> QString::splitRef(const QString &sep, SplitBehavior behavior, Qt::CaseSensitivity cs) const +{ + return splitString<QVector<QStringRef> >(*this, &QString::midRef, sep, behavior, cs, sep.size()); +} +/*! \overload */ QStringList QString::split(QChar sep, SplitBehavior behavior, Qt::CaseSensitivity cs) const { - QStringList list; + return splitString<QStringList>(*this, &QString::mid, sep, behavior, cs, 1); +} + +/*! + \overload + \since 5.4 +*/ +QVector<QStringRef> QString::splitRef(QChar sep, SplitBehavior behavior, Qt::CaseSensitivity cs) const +{ + return splitString<QVector<QStringRef> >(*this, &QString::midRef, sep, behavior, cs, 1); +} + +/*! + Splits the string into substrings references wherever \a sep occurs, and + returns the list of those strings. If \a sep does not match + anywhere in the string, split() returns a single-element vector + containing this string reference. + + \a cs specifies whether \a sep should be matched case + sensitively or case insensitively. + + If \a behavior is QString::SkipEmptyParts, empty entries don't + appear in the result. By default, empty entries are kept. + + \note All references are valid as long this string is alive. Destroying this + string will cause all references be dangling pointers. + + \since 5.4 +*/ +QVector<QStringRef> QStringRef::split(const QString &sep, QString::SplitBehavior behavior, Qt::CaseSensitivity cs) const +{ + return splitString<QVector<QStringRef> >(*this, &QStringRef::mid, sep, behavior, cs, sep.size()); +} + +/*! + \overload + \since 5.4 +*/ +QVector<QStringRef> QStringRef::split(QChar sep, QString::SplitBehavior behavior, Qt::CaseSensitivity cs) const +{ + return splitString<QVector<QStringRef> >(*this, &QStringRef::mid, sep, behavior, cs, 1); +} + +#ifndef QT_NO_REGEXP +namespace { +template<class ResultList, typename MidMethod> +static ResultList splitString(const QString &source, MidMethod mid, const QRegExp &rx, QString::SplitBehavior behavior) +{ + QRegExp rx2(rx); + ResultList list; int start = 0; + int extra = 0; int end; - while ((end = indexOf(sep, start, cs)) != -1) { - if (start != end || behavior == KeepEmptyParts) - list.append(mid(start, end - start)); - start = end + 1; + while ((end = rx2.indexIn(source, start + extra)) != -1) { + int matchedLen = rx2.matchedLength(); + if (start != end || behavior == QString::KeepEmptyParts) + list.append((source.*mid)(start, end - start)); + start = end + matchedLen; + extra = (matchedLen == 0) ? 1 : 0; } - if (start != size() || behavior == KeepEmptyParts) - list.append(mid(start)); + if (start != source.size() || behavior == QString::KeepEmptyParts) + list.append((source.*mid)(start, -1)); return list; } +} // namespace -#ifndef QT_NO_REGEXP /*! \overload @@ -6681,26 +6786,60 @@ QStringList QString::split(QChar sep, SplitBehavior behavior, Qt::CaseSensitivit */ QStringList QString::split(const QRegExp &rx, SplitBehavior behavior) const { - QRegExp rx2(rx); - QStringList list; - int start = 0; - int extra = 0; - int end; - while ((end = rx2.indexIn(*this, start + extra)) != -1) { - int matchedLen = rx2.matchedLength(); - if (start != end || behavior == KeepEmptyParts) - list.append(mid(start, end - start)); - start = end + matchedLen; - extra = (matchedLen == 0) ? 1 : 0; - } - if (start != size() || behavior == KeepEmptyParts) - list.append(mid(start)); - return list; + return splitString<QStringList>(*this, &QString::mid, rx, behavior); +} + +/*! + \overload + \since 5.4 + + Splits the string into substring references wherever the regular expression + \a rx matches, and returns the list of those strings. If \a rx + does not match anywhere in the string, splitRef() returns a + single-element vector containing this string reference. + + \note All references are valid as long this string is alive. Destroying this + string will cause all references be dangling pointers. + + \sa QStringRef split() +*/ +QVector<QStringRef> QString::splitRef(const QRegExp &rx, SplitBehavior behavior) const +{ + return splitString<QVector<QStringRef> >(*this, &QString::midRef, rx, behavior); } #endif #ifndef QT_NO_REGULAREXPRESSION #ifndef QT_BOOTSTRAPPED +namespace { +template<class ResultList, typename MidMethod> +static ResultList splitString(const QString &source, MidMethod mid, const QRegularExpression &re, + QString::SplitBehavior behavior) +{ + ResultList list; + if (!re.isValid()) { + qWarning("QString::split: invalid QRegularExpression object"); + return list; + } + + int start = 0; + int end = 0; + QRegularExpressionMatchIterator iterator = re.globalMatch(source); + while (iterator.hasNext()) { + QRegularExpressionMatch match = iterator.next(); + end = match.capturedStart(); + if (start != end || behavior == QString::KeepEmptyParts) + list.append((source.*mid)(start, end - start)); + start = match.capturedEnd(); + } + + if (start != source.size() || behavior == QString::KeepEmptyParts) + list.append((source.*mid)(start, -1)); + + return list; +} +} // namespace + /*! \overload \since 5.0 @@ -6730,27 +6869,26 @@ QStringList QString::split(const QRegExp &rx, SplitBehavior behavior) const */ QStringList QString::split(const QRegularExpression &re, SplitBehavior behavior) const { - QStringList list; - if (!re.isValid()) { - qWarning("QString::split: invalid QRegularExpression object"); - return list; - } + return splitString<QStringList>(*this, &QString::mid, re, behavior); +} - int start = 0; - int end = 0; - QRegularExpressionMatchIterator iterator = re.globalMatch(*this); - while (iterator.hasNext()) { - QRegularExpressionMatch match = iterator.next(); - end = match.capturedStart(); - if (start != end || behavior == KeepEmptyParts) - list.append(mid(start, end - start)); - start = match.capturedEnd(); - } +/*! + \overload + \since 5.4 - if (start != size() || behavior == KeepEmptyParts) - list.append(mid(start)); + Splits the string into substring references wherever the regular expression + \a re matches, and returns the list of those strings. If \a re + does not match anywhere in the string, splitRef() returns a + single-element vector containing this string reference. - return list; + \note All references are valid as long this string is alive. Destroying this + string will cause all references be dangling pointers. + + \sa split() QStringRef +*/ +QVector<QStringRef> QString::splitRef(const QRegularExpression &re, SplitBehavior behavior) const +{ + return splitString<QVector<QStringRef> >(*this, &QString::midRef, re, behavior); } #endif // QT_BOOTSTRAPPED #endif // QT_NO_REGULAREXPRESSION @@ -6935,15 +7073,18 @@ static ArgEscapeData findArgEscapes(const QString &s) break; } - if (c->digitValue() == -1) + int escape = c->digitValue(); + if (escape == -1) continue; - int escape = c->digitValue(); ++c; - if (c != uc_end && c->digitValue() != -1) { - escape = (10 * escape) + c->digitValue(); - ++c; + if (c != uc_end) { + int next_escape = c->digitValue(); + if (next_escape != -1) { + escape = (10 * escape) + next_escape; + ++c; + } } if (escape > d.min_escape) @@ -7720,7 +7861,7 @@ bool QString::isRightToLeft() const This method is mostly useful to pass a QString to a function that accepts a std::string object. - \sa toLatin1(), toUtf8(), toLocal8Bit() + \sa toLatin1(), toUtf8(), toLocal8Bit(), QByteArray::toStdString() */ /*! @@ -8311,6 +8452,30 @@ QDataStream &operator>>(QDataStream &in, QString &str) \sa {Implicitly Shared Classes} */ +/*! + \typedef QStringRef::size_type + \internal +*/ + +/*! + \typedef QStringRef::value_type + \internal +*/ + +/*! + \typedef QStringRef::const_pointer + \internal +*/ + +/*! + \typedef QStringRef::const_reference + \internal +*/ + +/*! + \typedef QStringRef::const_iterator + \internal +*/ /*! \fn QStringRef::QStringRef() @@ -8434,6 +8599,36 @@ ownership of it, no memory is freed when instances are destroyed. */ /*! + \fn const QChar *QStringRef::begin() const + \since 5.4 + + Same as unicode(). +*/ + +/*! + \fn const QChar *QStringRef::cbegin() const + \since 5.4 + + Same as unicode(). +*/ + +/*! + \fn const QChar *QStringRef::end() const + \since 5.4 + + Returns a pointer to one character past the last one in this string. + (It is the same as \c {unicode() + size()}.) +*/ + +/*! + \fn const QChar *QStringRef::cend() const + \since 5.4 + + Returns a pointer to one character past the last one in this string. + (It is the same as \c {unicode() + size()}.) +*/ + +/*! \fn const QChar *QStringRef::constData() const Same as unicode(). @@ -8876,19 +9071,19 @@ QStringRef QString::rightRef(int n) const */ QStringRef QStringRef::mid(int pos, int n) const { - if (pos > m_size) + using namespace QtPrivate; + switch (QContainerImplHelper::mid(m_size, &pos, &n)) { + case QContainerImplHelper::Null: return QStringRef(); - if (pos < 0) { - if (n < 0 || n + pos >= m_size) - return QStringRef(m_string, m_position, m_size); - if (n + pos <= 0) - return QStringRef(); - n += pos; - pos = 0; - } else if (uint(n) > uint(m_size - pos)) { - n = m_size - pos; + case QContainerImplHelper::Empty: + return QStringRef(m_string, 0, 0); + case QContainerImplHelper::Full: + return *this; + case QContainerImplHelper::Subset: + return QStringRef(m_string, pos + m_position, n); } - return QStringRef(m_string, pos + m_position, n); + Q_UNREACHABLE(); + return QStringRef(); } /*! @@ -8913,19 +9108,19 @@ QStringRef QStringRef::mid(int pos, int n) const */ QStringRef QString::midRef(int position, int n) const { - if (position > d->size) + using namespace QtPrivate; + switch (QContainerImplHelper::mid(d->size, &position, &n)) { + case QContainerImplHelper::Null: return QStringRef(); - if (position < 0) { - if (n < 0 || n + position >= d->size) - return QStringRef(this, 0, d->size); - if (n + position <= 0) - return QStringRef(); - - n += position; - position = 0; - } else if (uint(n) > uint(d->size - position)) - n = d->size - position; - return QStringRef(this, position, n); + case QContainerImplHelper::Empty: + return QStringRef(this, 0, 0); + case QContainerImplHelper::Full: + return QStringRef(this, 0, d->size); + case QContainerImplHelper::Subset: + return QStringRef(this, position, n); + } + Q_UNREACHABLE(); + return QStringRef(); } /*! |