summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/corelib/text/qstring.cpp4
-rw-r--r--src/corelib/text/qstring.h9
-rw-r--r--src/corelib/text/qstringconverter.h60
-rw-r--r--tests/auto/corelib/text/qstring/tst_qstring.cpp78
4 files changed, 150 insertions, 1 deletions
diff --git a/src/corelib/text/qstring.cpp b/src/corelib/text/qstring.cpp
index 81891e3cdb..a56d2c064c 100644
--- a/src/corelib/text/qstring.cpp
+++ b/src/corelib/text/qstring.cpp
@@ -3364,6 +3364,10 @@ QString &QString::append(QChar ch)
\list
\li QChar
\li QLatin1Char
+ \li \c {char}
+ \li \c {unsigned char}
+ \li \c {signed char}
+ \li \c {char8_t}
\li \c char16_t
\li (on platforms, such as Windows, where it is a 16-bit type) \c wchar_t
\li \c char32_t
diff --git a/src/corelib/text/qstring.h b/src/corelib/text/qstring.h
index c91dfe1850..078abda361 100644
--- a/src/corelib/text/qstring.h
+++ b/src/corelib/text/qstring.h
@@ -141,6 +141,7 @@ class Q_CORE_EXPORT QString
using is_compatible_char_helper = std::disjunction<
QtPrivate::IsCompatibleCharType<Char>,
QtPrivate::IsCompatibleChar32Type<Char>,
+ QtPrivate::IsCompatibleChar8Type<Char>,
std::is_same<Char, QLatin1Char> // special case
>;
@@ -451,6 +452,10 @@ public:
++first;
}
return *this;
+ } else if constexpr (QtPrivate::IsCompatibleChar8Type<V>::value) {
+ assign_helper_char8(first, last);
+ d.data()[d.size] = u'\0';
+ return *this;
} else {
d.assign(first, last, [](QChar ch) -> char16_t { return ch.unicode(); });
d.data()[d.size] = u'\0';
@@ -936,6 +941,9 @@ private:
void reallocGrowData(qsizetype n);
// ### remove once QAnyStringView supports UTF-32:
QString &assign_helper(const char32_t *data, qsizetype len);
+ // Defined in qstringconverter.h
+ template <typename InputIterator>
+ void assign_helper_char8(InputIterator first, InputIterator last);
static int compare_helper(const QChar *data1, qsizetype length1,
const QChar *data2, qsizetype length2,
Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept;
@@ -1512,6 +1520,7 @@ inline QString operator""_qs(const char16_t *str, size_t size) noexcept
QT_END_NAMESPACE
#include <QtCore/qstringbuilder.h>
+#include <QtCore/qstringconverter.h>
#ifdef Q_L1S_VIEW_IS_PRIMARY
# undef Q_L1S_VIEW_IS_PRIMARY
diff --git a/src/corelib/text/qstringconverter.h b/src/corelib/text/qstringconverter.h
index e12516966a..148501288c 100644
--- a/src/corelib/text/qstringconverter.h
+++ b/src/corelib/text/qstringconverter.h
@@ -211,6 +211,66 @@ QByteArray &operator+=(QByteArray &a, const QStringEncoder::DecodedData<T> &b)
}
#endif
+template <typename InputIterator>
+void QString::assign_helper_char8(InputIterator first, InputIterator last)
+{
+ static_assert(!QString::is_contiguous_iterator_v<InputIterator>,
+ "Internal error: Should have been handed over to the QAnyStringView overload."
+ );
+
+ using ValueType = typename std::iterator_traits<InputIterator>::value_type;
+ constexpr bool IsFwdIt = std::is_convertible_v<
+ typename std::iterator_traits<InputIterator>::iterator_category,
+ std::forward_iterator_tag
+ >;
+
+ resize(0);
+ // In case of not being shared, there is the possibility of having free space at begin
+ // even after the resize to zero.
+ if (const auto offset = d.freeSpaceAtBegin())
+ d.setBegin(d.begin() - offset);
+
+ if constexpr (IsFwdIt)
+ reserve(static_cast<qsizetype>(std::distance(first, last)));
+
+ auto toUtf16 = QStringDecoder(QStringDecoder::Utf8);
+ auto availableCapacity = d.constAllocatedCapacity();
+ auto *dst = d.data();
+ auto *dend = d.data() + availableCapacity;
+
+ while (true) {
+ if (first == last) { // ran out of input elements
+ Q_ASSERT(!std::less<>{}(dend, dst));
+ d.size = dst - d.begin();
+ return;
+ }
+ const ValueType next = *first; // decays proxies, if any
+ const auto chunk = QUtf8StringView(&next, 1);
+ // UTF-8 characters can have a maximum size of 4 bytes and may result in a surrogate
+ // pair of UTF-16 code units. In the input-iterator case, we don't know the size
+ // and would need to always reserve space for 2 code units. To keep our promise
+ // of 'not allocating if it fits', we have to pre-check this condition.
+ // We know that it fits in the forward-iterator case.
+ if constexpr (!IsFwdIt) {
+ constexpr qsizetype Pair = 2;
+ char16_t buf[Pair];
+ const qptrdiff n = toUtf16.appendToBuffer(buf, chunk) - buf;
+ if (dend - dst < n) { // ran out of allocated memory
+ const auto offset = dst - d.begin();
+ reallocData(d.constAllocatedCapacity() + Pair, QArrayData::Grow);
+ // update the pointers since we've re-allocated
+ availableCapacity = d.constAllocatedCapacity();
+ dst = d.data() + offset;
+ dend = d.data() + availableCapacity;
+ }
+ dst = std::copy_n(buf, n, dst);
+ } else { // take the fast path
+ dst = toUtf16.appendToBuffer(dst, chunk);
+ }
+ ++first;
+ }
+}
+
QT_END_NAMESPACE
#endif
diff --git a/tests/auto/corelib/text/qstring/tst_qstring.cpp b/tests/auto/corelib/text/qstring/tst_qstring.cpp
index 2d03fb9d7c..77fb85d80f 100644
--- a/tests/auto/corelib/text/qstring/tst_qstring.cpp
+++ b/tests/auto/corelib/text/qstring/tst_qstring.cpp
@@ -3467,6 +3467,37 @@ void tst_QString::assign()
QCOMPARE_EQ(str.capacity(), oldCap);
QCOMPARE_EQ(str.size(), 0);
+#ifndef QT_NO_CAST_FROM_ASCII
+ const char c8[] = "a©☻🂤"; // [1, 2, 3, 4] bytes in utf-8 code points
+ str.assign(std::begin(c8), std::end(c8) - 1);
+ QCOMPARE(str, c8);
+
+ std::string c8str(c8);
+ str.assign(c8str.begin(), c8str.end());
+ QCOMPARE(str, c8);
+ QCOMPARE(str.capacity(), qsizetype(std::size(c8) - 1));
+
+ oldCap = str.capacity();
+ str.assign(c8str.begin(), c8str.begin()); // empty range
+ QCOMPARE_EQ(str.capacity(), oldCap);
+ QCOMPARE_EQ(str.size(), 0);
+
+ std::forward_list<char> fwd(std::begin(c8), std::end(c8) - 1);
+ str.assign(fwd.begin(), fwd.end());
+ QCOMPARE(str, c8);
+#endif
+#ifdef __cpp_char8_t
+ const char8_t c8t[] = u8"🂤🂤🂤🂤🂤🂤🂤🂤🂤🂤"; // 10 x 4 bytes in utf-8 code points
+ str.assign(std::begin(c8t), std::end(c8t) - 1);
+ QCOMPARE(str, c8t);
+ QCOMPARE(str.size(), 20);
+#endif
+#ifdef __cpp_lib_char8_t
+ std::u8string c8tstr(c8t);
+ str.assign(c8tstr.begin(), c8tstr.end());
+ QCOMPARE(str, c8t);
+#endif
+
const char16_t c16[] = u"٩(⁎❛ᴗ❛⁎)۶ 🤷";
str.assign(std::begin(c16), std::end(c16) - 1);
QCOMPARE(str, c16);
@@ -3516,6 +3547,51 @@ void tst_QString::assign()
str.assign(std::istream_iterator<ushort>{}, std::istream_iterator<ushort>{}); // empty range
QCOMPARE_EQ(str.capacity(), oldCap);
QCOMPARE_EQ(str.size(), 0);
+
+#ifndef QT_NO_CAST_FROM_ASCII
+ str.resize(0);
+ str.squeeze();
+ str.reserve(5);
+ const char c8cmp[] = "🂤🂤a"; // 2 + 2 + 1 byte
+ ss.clear();
+ ss.str(c8cmp);
+ str.assign(std::istream_iterator<char>{ss}, std::istream_iterator<char>{});
+ QCOMPARE(str, c8cmp);
+ QCOMPARE(str.size(), 5);
+ QCOMPARE(str.capacity(), 5);
+
+ // 1 code-point + ill-formed sequence + 1 code-point.
+ const char c8IllFormed[] = "a\xe0\x9f\x80""a";
+ ss.clear();
+ ss.str(c8IllFormed);
+ str.assign(std::istream_iterator<char>{ss}, std::istream_iterator<char>{});
+ QEXPECT_FAIL("", "Iconsistent handling of ill-formed sequences, QTBUG-117051", Continue);
+ QCOMPARE_EQ(str, QString(c8IllFormed));
+
+ const char c82[] = "ÌşṫһíᶊśꞧɨℼṩuDF49ïľι?";
+ ss.clear();
+ ss.str(c82);
+ str.assign(std::istream_iterator<char>{ss}, std::istream_iterator<char>{});
+ QCOMPARE(str, c82);
+
+ const char uc8[] = "ẵƽ𝔰ȉ𝚐ꞑ𝒾𝝿𝕘";
+ ss.clear();
+ ss.str(uc8);
+ str.assign(std::istream_iterator<uchar>{ss}, std::istream_iterator<uchar>{});
+ QCOMPARE(str, uc8);
+
+ ss.clear();
+ const char sc8[] = "𓁇ख़ॵ௵";
+ ss.str(sc8);
+ str.assign(std::istream_iterator<signed char>{ss}, std::istream_iterator<signed char>{});
+ QCOMPARE(str, sc8);
+
+ oldCap = str.capacity();
+ str.assign(std::istream_iterator<signed char>{}, // empty range
+ std::istream_iterator<signed char>{});
+ QCOMPARE_EQ(str.capacity(), oldCap);
+ QCOMPARE_EQ(str.size(), 0);
+#endif
}
// Test chaining
{
@@ -3634,7 +3710,7 @@ void tst_QString::assign_uses_prepend_buffer()
for (qsizetype i = 0; i < withFreeSpaceAtBegin.d.freeSpaceAtBegin(); ++i)
ss << "d ";
- withFreeSpaceAtBegin.assign(std::istream_iterator<ushort>{ss}, std::istream_iterator<ushort>{});
+ withFreeSpaceAtBegin.assign(std::istream_iterator<char>{ss}, std::istream_iterator<char>{});
QCOMPARE_EQ(withFreeSpaceAtBegin.d.freeSpaceAtBegin(), 0); // we used the prepend buffer
QCOMPARE_EQ(capBegin(withFreeSpaceAtBegin), oldCapBegin);
QCOMPARE_EQ(capEnd(withFreeSpaceAtBegin), oldCapEnd);