diff options
author | Marc Mutz <marc.mutz@kdab.com> | 2020-05-10 23:18:18 +0200 |
---|---|---|
committer | Marc Mutz <marc.mutz@kdab.com> | 2020-05-19 10:15:38 +0200 |
commit | 2259a433ca341129253ceb69bbe9eefc546282a3 (patch) | |
tree | 662ed56adc65ae8ef7fdf858d200483a19822fb7 | |
parent | 60ec350bc79d562c0611bc31e8655729272c31d6 (diff) |
QString: throughly port internals to char16_t
This includes allocating QString data as char16_t instead of ushort.
This isn't the end of the port, but an important milestone: the
traditional foldChar() functions are now all unused.
Change-Id: I766bebc2d70b6972e2045d3474c7f5770f4676d9
Reviewed-by: Lars Knoll <lars.knoll@qt.io>
-rw-r--r-- | src/corelib/text/qchar.cpp | 25 | ||||
-rw-r--r-- | src/corelib/text/qstring.cpp | 151 | ||||
-rw-r--r-- | src/corelib/text/qstring.h | 2 | ||||
-rw-r--r-- | src/corelib/text/qstringliteral.h | 6 |
4 files changed, 76 insertions, 108 deletions
diff --git a/src/corelib/text/qchar.cpp b/src/corelib/text/qchar.cpp index 358cc82f22..b7f78c3f5d 100644 --- a/src/corelib/text/qchar.cpp +++ b/src/corelib/text/qchar.cpp @@ -1703,15 +1703,6 @@ char32_t QChar::toTitleCase(char32_t ucs4) noexcept return convertCase_helper(ucs4, QUnicodeTables::TitleCase); } -[[deprecated]] -static inline uint foldCase(const ushort *ch, const ushort *start) -{ - char32_t ucs4 = *ch; - if (QChar::isLowSurrogate(ucs4) && ch > start && QChar::isHighSurrogate(*(ch - 1))) - ucs4 = QChar::surrogateToUcs4(*(ch - 1), ucs4); - return convertCase_helper(ucs4, QUnicodeTables::CaseFold); -} - static inline char32_t foldCase(const char16_t *ch, const char16_t *start) { char32_t ucs4 = *ch; @@ -1720,16 +1711,6 @@ static inline char32_t foldCase(const char16_t *ch, const char16_t *start) return convertCase_helper(ucs4, QUnicodeTables::CaseFold); } -[[deprecated]] -static inline uint foldCase(uint ch, uint &last) noexcept -{ - char32_t ucs4 = ch; - if (QChar::isLowSurrogate(ucs4) && QChar::isHighSurrogate(last)) - ucs4 = QChar::surrogateToUcs4(last, ucs4); - last = ch; - return convertCase_helper(ucs4, QUnicodeTables::CaseFold); -} - static inline char32_t foldCase(char32_t ch, char32_t &last) noexcept { char32_t ucs4 = ch; @@ -1739,12 +1720,6 @@ static inline char32_t foldCase(char32_t ch, char32_t &last) noexcept return convertCase_helper(ucs4, QUnicodeTables::CaseFold); } -[[deprecated]] -static inline ushort foldCase(ushort ch) noexcept -{ - return convertCase_helper(char16_t{ch}, QUnicodeTables::CaseFold); -} - static inline char16_t foldCase(char16_t ch) noexcept { return convertCase_helper(ch, QUnicodeTables::CaseFold); diff --git a/src/corelib/text/qstring.cpp b/src/corelib/text/qstring.cpp index 1765ca0aa1..57be0eabcf 100644 --- a/src/corelib/text/qstring.cpp +++ b/src/corelib/text/qstring.cpp @@ -100,9 +100,10 @@ QT_BEGIN_NAMESPACE -namespace { -// temporary; to easy porting to char16_t -char16_t *to_utf16(ushort *p) { return reinterpret_cast<char16_t *>(p); } +template <typename T, typename Cmp = std::less<>> +static constexpr bool points_into_range(const T *p, const T *b, const T *e, Cmp less = {}) noexcept +{ + return !less(p, b) && less(p, e); } /* @@ -847,8 +848,8 @@ static int ucstricmp(const QChar *a, const QChar *ae, const QChar *b, const QCha if (be - b < ae - a) e = a + (be - b); - uint alast = 0; - uint blast = 0; + char32_t alast = 0; + char32_t blast = 0; while (a < e) { // qDebug() << Qt::hex << alast << blast; // qDebug() << Qt::hex << "*a=" << *a << "alast=" << alast << "folded=" << foldCase (*a, alast); @@ -891,8 +892,8 @@ static int ucstricmp(const QChar *a, const QChar *ae, const char *b, const char #if defined(__mips_dsp) // From qstring_mips_dsp_asm.S -extern "C" int qt_ucstrncmp_mips_dsp_asm(const ushort *a, - const ushort *b, +extern "C" int qt_ucstrncmp_mips_dsp_asm(const char16_t *a, + const char16_t *b, unsigned len); #endif @@ -912,8 +913,8 @@ static int ucstrncmp(const QChar *a, const QChar *b, size_t l) #if defined(__mips_dsp) Q_STATIC_ASSERT(sizeof(uint) == sizeof(size_t)); if (l >= 8) { - return qt_ucstrncmp_mips_dsp_asm(reinterpret_cast<const ushort*>(a), - reinterpret_cast<const ushort*>(b), + return qt_ucstrncmp_mips_dsp_asm(reinterpret_cast<const char16_t*>(a), + reinterpret_cast<const char16_t*>(b), l); } #endif // __mips_dsp @@ -1061,8 +1062,8 @@ static int ucstrncmp(const QChar *a, const QChar *b, size_t l) static int ucstrncmp(const QChar *a, const uchar *c, size_t l) { - const ushort *uc = reinterpret_cast<const ushort *>(a); - const ushort *e = uc + l; + const char16_t *uc = reinterpret_cast<const char16_t *>(a); + const char16_t *e = uc + l; #ifdef __SSE2__ __m128i nullmask = _mm_setzero_si128(); @@ -1153,7 +1154,7 @@ static int ucstrncmp(const QChar *a, const uchar *c, size_t l) c += offset; # if !defined(__OPTIMIZE_SIZE__) - const auto lambda = [=](size_t i) { return uc[i] - ushort(c[i]); }; + const auto lambda = [=](size_t i) { return uc[i] - char16_t(c[i]); }; return UnrollTailLoop<MaxTailLength>::exec(e - uc, 0, lambda, lambda); # endif #endif @@ -2115,9 +2116,9 @@ QString::QString(int size, QChar ch) } else { d = DataPointer(Data::allocate(size + 1), size); d.data()[size] = '\0'; - ushort *i = d.data() + size; - ushort *b = d.data(); - const ushort value = ch.unicode(); + char16_t *i = d.data() + size; + char16_t *b = d.data(); + const char16_t value = ch.unicode(); while (i != b) *--i = value; } @@ -2387,7 +2388,7 @@ QString &QString::operator=(QLatin1String other) if (isDetached() && other.size() <= capacity()) { // assumes d->alloc == 0 -> !isDetached() (sharedNull) d.size = other.size(); d.data()[other.size()] = 0; - qt_from_latin1(to_utf16(d.data()), other.latin1(), other.size()); + qt_from_latin1(d.data(), other.latin1(), other.size()); } else { *this = fromLatin1(other.latin1(), other.size()); } @@ -2541,7 +2542,7 @@ QString &QString::insert(int i, QLatin1String str) resize(size() + len); ::memmove(d.data() + i + len, d.data() + i, (d.size - i - len) * sizeof(QChar)); - qt_from_latin1(to_utf16(d.data() + i), s, uint(len)); + qt_from_latin1(d.data() + i, s, uint(len)); return *this; } @@ -2557,14 +2558,9 @@ QString& QString::insert(int i, const QChar *unicode, int size) if (i < 0 || size <= 0) return *this; - const ushort *s = (const ushort *)unicode; - const std::less<const ushort*> less; - if (!less(s, d.data()) && less(s, d.data() + d.size)) { - // Part of me - take a copy - const QVarLengthArray<ushort> copy(s, s + size); - insert(i, reinterpret_cast<const QChar *>(copy.data()), size); - return *this; - } + const auto s = reinterpret_cast<const char16_t *>(unicode); + if (points_into_range(s, d.data(), d.data() + d.size)) + return insert(i, QStringView{QVarLengthArray(s, s + size)}); if (Q_UNLIKELY(i > int(d.size))) resize(i + size, QLatin1Char(' ')); @@ -2662,7 +2658,7 @@ QString &QString::append(QLatin1String str) int len = str.size(); if (d->needsDetach() || size() + len > capacity()) reallocData(uint(size() + len) + 1u, true); - char16_t *i = to_utf16(d.data() + d.size); + char16_t *i = d.data() + d.size; qt_from_latin1(i, s, uint(len)); i[len] = '\0'; d.size += len; @@ -2819,7 +2815,7 @@ QString &QString::remove(int pos, int len) } else if (len > 0) { detach(); memmove(d.data() + pos, d.data() + pos + len, - (d.size - pos - len + 1) * sizeof(ushort)); + (d.size - pos - len + 1) * sizeof(QChar)); d.size -= len; } return *this; @@ -2867,15 +2863,11 @@ static void removeStringImpl(QString &s, const T &needle, Qt::CaseSensitivity cs */ QString &QString::remove(const QString &str, Qt::CaseSensitivity cs) { - const auto s = reinterpret_cast<const ushort *>(str.data()); - const std::less<const ushort *> less; - if (!less(s, d.data()) && less(s, d.data() + d.size)) { - // Part of me - take a copy - const QVarLengthArray<ushort> copy(s, s + str.size()); - removeStringImpl(*this, QStringView{copy.data(), copy.size()}, cs); - } else { + const auto s = str.d.data(); + if (points_into_range(s, d.data(), d.data() + d.size)) + removeStringImpl(*this, QStringView{QVarLengthArray(s, s + str.size())}, cs); + else removeStringImpl(*this, qToStringViewIgnoringNull(str), cs); - } return *this; } @@ -3031,7 +3023,7 @@ QChar *textCopy(const QChar *start, int len) return copy; } -bool pointsIntoRange(const QChar *ptr, const ushort *base, int len) +static bool pointsIntoRange(const QChar *ptr, const char16_t *base, qsizetype len) { const QChar *const start = reinterpret_cast<const QChar *>(base); const std::less<const QChar *> less; @@ -3196,7 +3188,7 @@ QString& QString::replace(QChar ch, const QString &after, Qt::CaseSensitivity cs if (size() == 0) return *this; - ushort cc = (cs == Qt::CaseSensitive ? ch.unicode() : ch.toCaseFolded().unicode()); + char16_t cc = (cs == Qt::CaseSensitive ? ch.unicode() : ch.toCaseFolded().unicode()); int index = 0; while (1) { @@ -3242,19 +3234,19 @@ QString& QString::replace(QChar before, QChar after, Qt::CaseSensitivity cs) const int idx = indexOf(before, 0, cs); if (idx != -1) { detach(); - const ushort a = after.unicode(); - ushort *i = d.data(); - ushort *const e = i + d.size; + const char16_t a = after.unicode(); + char16_t *i = d.data(); + char16_t *const e = i + d.size; i += idx; *i = a; if (cs == Qt::CaseSensitive) { - const ushort b = before.unicode(); + const char16_t b = before.unicode(); while (++i != e) { if (*i == b) *i = a; } } else { - const ushort b = foldCase(before.unicode()); + const char16_t b = foldCase(before.unicode()); while (++i != e) { if (foldCase(*i) == b) *i = a; @@ -4563,7 +4555,7 @@ QString QString::mid(int position, int n) const return QString(); case QContainerImplHelper::Empty: { - QPair<Data *, ushort *> pair = Data::allocate(0); + QPair<Data *, char16_t *> pair = Data::allocate(0); DataPointer empty = { pair.first, pair.second, 0 }; return QString(empty); } @@ -4820,7 +4812,7 @@ QByteArray QString::toLatin1_helper_inplace(QString &s) // We can return our own buffer to the caller. // Conversion to Latin-1 always shrinks the buffer by half. - const char16_t *data = to_utf16(s.d.data()); + const char16_t *data = s.d.data(); int length = s.d.size; // Move the d pointer over to the bytearray. @@ -5016,7 +5008,7 @@ QString::DataPointer QString::fromLatin1_helper(const char *str, int size) size = qstrlen(str); d = DataPointer(Data::allocate(size + 1), size); d.data()[size] = '\0'; - char16_t *dst = to_utf16(d.data()); + char16_t *dst = d.data(); qt_from_latin1(dst, str, uint(size)); } @@ -5066,7 +5058,7 @@ QString QString::fromLocal8Bit_helper(const char *str, int size) if (!str) return QString(); if (size == 0 || (!*str && size < 0)) { - QPair<Data *, ushort *> pair = Data::allocate(0); + QPair<Data *, char16_t *> pair = Data::allocate(0); QString::DataPointer empty = { pair.first, pair.second, 0 }; return QString(empty); } @@ -6018,7 +6010,7 @@ const ushort *QString::utf16() const // ensure '\0'-termination for ::fromRawData strings const_cast<QString*>(this)->reallocData(uint(d.size) + 1u); } - return d.data(); + return reinterpret_cast<const ushort *>(d.data()); } /*! @@ -6570,7 +6562,7 @@ QString QString::vasprintf(const char *cformat, va_list ap) } case 'c': { if (length_mod == lm_l) - subst = QChar((ushort) va_arg(ap, int)); + subst = QChar::fromUcs2(va_arg(ap, int)); else subst = QLatin1Char((uchar) va_arg(ap, int)); ++c; @@ -7548,18 +7540,18 @@ QString QString::repeated(int times) const if (result.capacity() != resultSize) return QString(); // not enough memory - memcpy(result.d.data(), d.data(), d.size * sizeof(ushort)); + memcpy(result.d.data(), d.data(), d.size * sizeof(QChar)); int sizeSoFar = d.size; - ushort *end = result.d.data() + sizeSoFar; + char16_t *end = result.d.data() + sizeSoFar; const int halfResultSize = resultSize >> 1; while (sizeSoFar <= halfResultSize) { - memcpy(end, result.d.data(), sizeSoFar * sizeof(ushort)); + memcpy(end, result.d.data(), sizeSoFar * sizeof(QChar)); end += sizeSoFar; sizeSoFar <<= 1; } - memcpy(end, result.d.data(), (resultSize - sizeSoFar) * sizeof(ushort)); + memcpy(end, result.d.data(), (resultSize - sizeSoFar) * sizeof(QChar)); result.d.data()[resultSize] = '\0'; result.d.size = resultSize; return result; @@ -7583,10 +7575,10 @@ void qt_string_normalize(QString *data, QString::NormalizationForm mode, QChar:: if (n.version > version) { int pos = from; if (QChar::requiresSurrogates(n.ucs4)) { - ushort ucs4High = QChar::highSurrogate(n.ucs4); - ushort ucs4Low = QChar::lowSurrogate(n.ucs4); - ushort oldHigh = QChar::highSurrogate(n.old_mapping); - ushort oldLow = QChar::lowSurrogate(n.old_mapping); + char16_t ucs4High = QChar::highSurrogate(n.ucs4); + char16_t ucs4Low = QChar::lowSurrogate(n.ucs4); + char16_t oldHigh = QChar::highSurrogate(n.old_mapping); + char16_t oldLow = QChar::lowSurrogate(n.old_mapping); while (pos < s.length() - 1) { if (s.at(pos).unicode() == ucs4High && s.at(pos + 1).unicode() == ucs4Low) { if (!d) @@ -8310,8 +8302,8 @@ QString QString::arg(double a, int fieldWidth, char fmt, int prec, QChar fillCha return replaceArgEscapes(*this, d, fieldWidth, arg, locale_arg, fillChar); } -static inline ushort to_unicode(const QChar c) { return c.unicode(); } -static inline ushort to_unicode(const char c) { return QLatin1Char{c}.unicode(); } +static inline char16_t to_unicode(const QChar c) { return c.unicode(); } +static inline char16_t to_unicode(const char c) { return QLatin1Char{c}.unicode(); } template <typename Char> static int getEscape(const Char *uc, qsizetype *pos, qsizetype len, int maxNumber = 999) @@ -8557,10 +8549,10 @@ QString QtPrivate::argToQString(QLatin1String pattern, size_t n, const ArgBase * */ bool QString::isSimpleText() const { - const ushort *p = d.data(); - const ushort * const end = p + d.size; + const char16_t *p = d.data(); + const char16_t * const end = p + d.size; while (p < end) { - ushort uc = *p; + char16_t uc = *p; // sort out regions of complex text formatting if (uc > 0x058f && (uc < 0x1100 || uc > 0xfb0f)) { return false; @@ -8727,7 +8719,7 @@ QString QString::fromRawData(const QChar *unicode, int size) } else if (!size) { x = DataPointer(Data::allocate(0), 0); } else { - x = Data::fromRawData(reinterpret_cast<const ushort *>(unicode), size); + x = Data::fromRawData(reinterpret_cast<const char16_t *>(unicode), size); } return QString(x); } @@ -9680,9 +9672,9 @@ QDataStream &operator<<(QDataStream &out, const QString &str) if ((out.byteOrder() == QDataStream::BigEndian) == (QSysInfo::ByteOrder == QSysInfo::BigEndian)) { out.writeBytes(reinterpret_cast<const char *>(str.unicode()), uint(sizeof(QChar) * str.length())); } else { - QVarLengthArray<ushort> buffer(str.length()); - qbswap<sizeof(ushort)>(str.constData(), str.length(), buffer.data()); - out.writeBytes(reinterpret_cast<const char *>(buffer.data()), uint(sizeof(ushort) * buffer.size())); + QVarLengthArray<char16_t> buffer(str.length()); + qbswap<sizeof(char16_t)>(str.constData(), str.length(), buffer.data()); + out.writeBytes(reinterpret_cast<const char *>(buffer.data()), uint(sizeof(char16_t) * buffer.size())); } } else { // write null marker @@ -9737,7 +9729,7 @@ QDataStream &operator>>(QDataStream &in, QString &str) if ((in.byteOrder() == QDataStream::BigEndian) != (QSysInfo::ByteOrder == QSysInfo::BigEndian)) { - ushort *data = reinterpret_cast<ushort *>(str.data()); + char16_t *data = reinterpret_cast<char16_t *>(str.data()); qbswap<sizeof(*data)>(data, len, data); } } else { @@ -11009,13 +11001,13 @@ bool QStringRef::isRightToLeft() const */ bool QtPrivate::isRightToLeft(QStringView string) noexcept { - const ushort *p = reinterpret_cast<const ushort*>(string.data()); - const ushort * const end = p + string.size(); + const char16_t *p = string.utf16(); + const char16_t * const end = p + string.size(); int isolateLevel = 0; while (p < end) { uint ucs4 = *p; if (QChar::isHighSurrogate(ucs4) && p < end - 1) { - ushort low = p[1]; + char16_t low = p[1]; if (QChar::isLowSurrogate(low)) { ucs4 = QChar::surrogateToUcs4(ucs4, low); ++p; @@ -11418,33 +11410,34 @@ bool QtPrivate::endsWith(QLatin1String haystack, QLatin1String needle, Qt::CaseS namespace { template <typename Pointer> -uint foldCaseHelper(Pointer ch, Pointer start) = delete; +char32_t foldCaseHelper(Pointer ch, Pointer start) = delete; template <> -uint foldCaseHelper<const QChar*>(const QChar* ch, const QChar* start) +char32_t foldCaseHelper<const QChar*>(const QChar* ch, const QChar* start) { - return foldCase(reinterpret_cast<const ushort*>(ch), reinterpret_cast<const ushort*>(start)); + return foldCase(reinterpret_cast<const char16_t*>(ch), + reinterpret_cast<const char16_t*>(start)); } template <> -uint foldCaseHelper<const char*>(const char* ch, const char*) +char32_t foldCaseHelper<const char*>(const char* ch, const char*) { - return foldCase(ushort(uchar(*ch))); + return foldCase(char16_t(uchar(*ch))); } template <typename T> -ushort valueTypeToUtf16(T t) = delete; +char16_t valueTypeToUtf16(T t) = delete; template <> -ushort valueTypeToUtf16<QChar>(QChar t) +char16_t valueTypeToUtf16<QChar>(QChar t) { return t.unicode(); } template <> -ushort valueTypeToUtf16<char>(char t) +char16_t valueTypeToUtf16<char>(char t) { - return ushort(uchar(t)); + return char16_t{uchar(t)}; } } @@ -11565,7 +11558,7 @@ static inline qsizetype qLastIndexOf(Haystack haystack, QChar needle, if (std::size_t(from) >= std::size_t(haystack.size())) return -1; if (from >= 0) { - ushort c = needle.unicode(); + char16_t c = needle.unicode(); const auto b = haystack.data(); auto n = b + from; if (cs == Qt::CaseSensitive) { diff --git a/src/corelib/text/qstring.h b/src/corelib/text/qstring.h index 733d796e81..801d16a853 100644 --- a/src/corelib/text/qstring.h +++ b/src/corelib/text/qstring.h @@ -254,7 +254,7 @@ qsizetype QStringView::lastIndexOf(QLatin1String s, qsizetype from, Qt::CaseSens class Q_CORE_EXPORT QString { - typedef QTypedArrayData<ushort> Data; + typedef QTypedArrayData<char16_t> Data; public: typedef QStringPrivate DataPointer; diff --git a/src/corelib/text/qstringliteral.h b/src/corelib/text/qstringliteral.h index c3b274ca90..0968a9531a 100644 --- a/src/corelib/text/qstringliteral.h +++ b/src/corelib/text/qstringliteral.h @@ -68,15 +68,15 @@ Q_STATIC_ASSERT_X(sizeof(qunicodechar) == 2, Q_BASIC_ATOMIC_INITIALIZER(-1), QArrayData::StaticDataFlags, 0 \ }; \ QStringPrivate holder = { \ - static_cast<QTypedArrayData<ushort> *>(const_cast<QArrayData *>(&qstring_literal)), \ - reinterpret_cast<ushort *>(const_cast<qunicodechar *>(QT_UNICODE_LITERAL(str))), \ + static_cast<QTypedArrayData<char16_t> *>(const_cast<QArrayData *>(&qstring_literal)), \ + const_cast<qunicodechar *>(QT_UNICODE_LITERAL(str)), \ Size \ }; \ return QString(holder); \ }()) \ /**/ -using QStringPrivate = QArrayDataPointer<ushort>; +using QStringPrivate = QArrayDataPointer<char16_t>; QT_END_NAMESPACE |