diff options
-rw-r--r-- | src/corelib/text/qstring.cpp | 4 | ||||
-rw-r--r-- | src/corelib/text/qstring.h | 9 | ||||
-rw-r--r-- | src/corelib/text/qstringconverter.h | 60 | ||||
-rw-r--r-- | tests/auto/corelib/text/qstring/tst_qstring.cpp | 78 |
4 files changed, 150 insertions, 1 deletions
diff --git a/src/corelib/text/qstring.cpp b/src/corelib/text/qstring.cpp index 81891e3cdb..a56d2c064c 100644 --- a/src/corelib/text/qstring.cpp +++ b/src/corelib/text/qstring.cpp @@ -3364,6 +3364,10 @@ QString &QString::append(QChar ch) \list \li QChar \li QLatin1Char + \li \c {char} + \li \c {unsigned char} + \li \c {signed char} + \li \c {char8_t} \li \c char16_t \li (on platforms, such as Windows, where it is a 16-bit type) \c wchar_t \li \c char32_t diff --git a/src/corelib/text/qstring.h b/src/corelib/text/qstring.h index c91dfe1850..078abda361 100644 --- a/src/corelib/text/qstring.h +++ b/src/corelib/text/qstring.h @@ -141,6 +141,7 @@ class Q_CORE_EXPORT QString using is_compatible_char_helper = std::disjunction< QtPrivate::IsCompatibleCharType<Char>, QtPrivate::IsCompatibleChar32Type<Char>, + QtPrivate::IsCompatibleChar8Type<Char>, std::is_same<Char, QLatin1Char> // special case >; @@ -451,6 +452,10 @@ public: ++first; } return *this; + } else if constexpr (QtPrivate::IsCompatibleChar8Type<V>::value) { + assign_helper_char8(first, last); + d.data()[d.size] = u'\0'; + return *this; } else { d.assign(first, last, [](QChar ch) -> char16_t { return ch.unicode(); }); d.data()[d.size] = u'\0'; @@ -936,6 +941,9 @@ private: void reallocGrowData(qsizetype n); // ### remove once QAnyStringView supports UTF-32: QString &assign_helper(const char32_t *data, qsizetype len); + // Defined in qstringconverter.h + template <typename InputIterator> + void assign_helper_char8(InputIterator first, InputIterator last); static int compare_helper(const QChar *data1, qsizetype length1, const QChar *data2, qsizetype length2, Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept; @@ -1512,6 +1520,7 @@ inline QString operator""_qs(const char16_t *str, size_t size) noexcept QT_END_NAMESPACE #include <QtCore/qstringbuilder.h> +#include <QtCore/qstringconverter.h> #ifdef Q_L1S_VIEW_IS_PRIMARY # undef Q_L1S_VIEW_IS_PRIMARY diff --git a/src/corelib/text/qstringconverter.h b/src/corelib/text/qstringconverter.h index e12516966a..148501288c 100644 --- a/src/corelib/text/qstringconverter.h +++ b/src/corelib/text/qstringconverter.h @@ -211,6 +211,66 @@ QByteArray &operator+=(QByteArray &a, const QStringEncoder::DecodedData<T> &b) } #endif +template <typename InputIterator> +void QString::assign_helper_char8(InputIterator first, InputIterator last) +{ + static_assert(!QString::is_contiguous_iterator_v<InputIterator>, + "Internal error: Should have been handed over to the QAnyStringView overload." + ); + + using ValueType = typename std::iterator_traits<InputIterator>::value_type; + constexpr bool IsFwdIt = std::is_convertible_v< + typename std::iterator_traits<InputIterator>::iterator_category, + std::forward_iterator_tag + >; + + resize(0); + // In case of not being shared, there is the possibility of having free space at begin + // even after the resize to zero. + if (const auto offset = d.freeSpaceAtBegin()) + d.setBegin(d.begin() - offset); + + if constexpr (IsFwdIt) + reserve(static_cast<qsizetype>(std::distance(first, last))); + + auto toUtf16 = QStringDecoder(QStringDecoder::Utf8); + auto availableCapacity = d.constAllocatedCapacity(); + auto *dst = d.data(); + auto *dend = d.data() + availableCapacity; + + while (true) { + if (first == last) { // ran out of input elements + Q_ASSERT(!std::less<>{}(dend, dst)); + d.size = dst - d.begin(); + return; + } + const ValueType next = *first; // decays proxies, if any + const auto chunk = QUtf8StringView(&next, 1); + // UTF-8 characters can have a maximum size of 4 bytes and may result in a surrogate + // pair of UTF-16 code units. In the input-iterator case, we don't know the size + // and would need to always reserve space for 2 code units. To keep our promise + // of 'not allocating if it fits', we have to pre-check this condition. + // We know that it fits in the forward-iterator case. + if constexpr (!IsFwdIt) { + constexpr qsizetype Pair = 2; + char16_t buf[Pair]; + const qptrdiff n = toUtf16.appendToBuffer(buf, chunk) - buf; + if (dend - dst < n) { // ran out of allocated memory + const auto offset = dst - d.begin(); + reallocData(d.constAllocatedCapacity() + Pair, QArrayData::Grow); + // update the pointers since we've re-allocated + availableCapacity = d.constAllocatedCapacity(); + dst = d.data() + offset; + dend = d.data() + availableCapacity; + } + dst = std::copy_n(buf, n, dst); + } else { // take the fast path + dst = toUtf16.appendToBuffer(dst, chunk); + } + ++first; + } +} + QT_END_NAMESPACE #endif diff --git a/tests/auto/corelib/text/qstring/tst_qstring.cpp b/tests/auto/corelib/text/qstring/tst_qstring.cpp index 2d03fb9d7c..77fb85d80f 100644 --- a/tests/auto/corelib/text/qstring/tst_qstring.cpp +++ b/tests/auto/corelib/text/qstring/tst_qstring.cpp @@ -3467,6 +3467,37 @@ void tst_QString::assign() QCOMPARE_EQ(str.capacity(), oldCap); QCOMPARE_EQ(str.size(), 0); +#ifndef QT_NO_CAST_FROM_ASCII + const char c8[] = "a©☻🂤"; // [1, 2, 3, 4] bytes in utf-8 code points + str.assign(std::begin(c8), std::end(c8) - 1); + QCOMPARE(str, c8); + + std::string c8str(c8); + str.assign(c8str.begin(), c8str.end()); + QCOMPARE(str, c8); + QCOMPARE(str.capacity(), qsizetype(std::size(c8) - 1)); + + oldCap = str.capacity(); + str.assign(c8str.begin(), c8str.begin()); // empty range + QCOMPARE_EQ(str.capacity(), oldCap); + QCOMPARE_EQ(str.size(), 0); + + std::forward_list<char> fwd(std::begin(c8), std::end(c8) - 1); + str.assign(fwd.begin(), fwd.end()); + QCOMPARE(str, c8); +#endif +#ifdef __cpp_char8_t + const char8_t c8t[] = u8"🂤🂤🂤🂤🂤🂤🂤🂤🂤🂤"; // 10 x 4 bytes in utf-8 code points + str.assign(std::begin(c8t), std::end(c8t) - 1); + QCOMPARE(str, c8t); + QCOMPARE(str.size(), 20); +#endif +#ifdef __cpp_lib_char8_t + std::u8string c8tstr(c8t); + str.assign(c8tstr.begin(), c8tstr.end()); + QCOMPARE(str, c8t); +#endif + const char16_t c16[] = u"٩(⁎❛ᴗ❛⁎)۶ 🤷"; str.assign(std::begin(c16), std::end(c16) - 1); QCOMPARE(str, c16); @@ -3516,6 +3547,51 @@ void tst_QString::assign() str.assign(std::istream_iterator<ushort>{}, std::istream_iterator<ushort>{}); // empty range QCOMPARE_EQ(str.capacity(), oldCap); QCOMPARE_EQ(str.size(), 0); + +#ifndef QT_NO_CAST_FROM_ASCII + str.resize(0); + str.squeeze(); + str.reserve(5); + const char c8cmp[] = "🂤🂤a"; // 2 + 2 + 1 byte + ss.clear(); + ss.str(c8cmp); + str.assign(std::istream_iterator<char>{ss}, std::istream_iterator<char>{}); + QCOMPARE(str, c8cmp); + QCOMPARE(str.size(), 5); + QCOMPARE(str.capacity(), 5); + + // 1 code-point + ill-formed sequence + 1 code-point. + const char c8IllFormed[] = "a\xe0\x9f\x80""a"; + ss.clear(); + ss.str(c8IllFormed); + str.assign(std::istream_iterator<char>{ss}, std::istream_iterator<char>{}); + QEXPECT_FAIL("", "Iconsistent handling of ill-formed sequences, QTBUG-117051", Continue); + QCOMPARE_EQ(str, QString(c8IllFormed)); + + const char c82[] = "ÌşṫһíᶊśꞧɨℼṩuDF49ïľι?"; + ss.clear(); + ss.str(c82); + str.assign(std::istream_iterator<char>{ss}, std::istream_iterator<char>{}); + QCOMPARE(str, c82); + + const char uc8[] = "ẵƽ𝔰ȉ𝚐ꞑ𝒾𝝿𝕘"; + ss.clear(); + ss.str(uc8); + str.assign(std::istream_iterator<uchar>{ss}, std::istream_iterator<uchar>{}); + QCOMPARE(str, uc8); + + ss.clear(); + const char sc8[] = "𓁇ख़ॵ௵"; + ss.str(sc8); + str.assign(std::istream_iterator<signed char>{ss}, std::istream_iterator<signed char>{}); + QCOMPARE(str, sc8); + + oldCap = str.capacity(); + str.assign(std::istream_iterator<signed char>{}, // empty range + std::istream_iterator<signed char>{}); + QCOMPARE_EQ(str.capacity(), oldCap); + QCOMPARE_EQ(str.size(), 0); +#endif } // Test chaining { @@ -3634,7 +3710,7 @@ void tst_QString::assign_uses_prepend_buffer() for (qsizetype i = 0; i < withFreeSpaceAtBegin.d.freeSpaceAtBegin(); ++i) ss << "d "; - withFreeSpaceAtBegin.assign(std::istream_iterator<ushort>{ss}, std::istream_iterator<ushort>{}); + withFreeSpaceAtBegin.assign(std::istream_iterator<char>{ss}, std::istream_iterator<char>{}); QCOMPARE_EQ(withFreeSpaceAtBegin.d.freeSpaceAtBegin(), 0); // we used the prepend buffer QCOMPARE_EQ(capBegin(withFreeSpaceAtBegin), oldCapBegin); QCOMPARE_EQ(capEnd(withFreeSpaceAtBegin), oldCapEnd); |