summaryrefslogtreecommitdiffstats
path: root/src/corelib/text/qstringconverter.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/corelib/text/qstringconverter.cpp')
-rw-r--r--src/corelib/text/qstringconverter.cpp779
1 files changed, 550 insertions, 229 deletions
diff --git a/src/corelib/text/qstringconverter.cpp b/src/corelib/text/qstringconverter.cpp
index 4c94de029e..565e3e598b 100644
--- a/src/corelib/text/qstringconverter.cpp
+++ b/src/corelib/text/qstringconverter.cpp
@@ -10,6 +10,8 @@
#include "private/qstringiterator_p.h"
#include "private/qtools_p.h"
#include "qbytearraymatcher.h"
+#include "qcontainertools_impl.h"
+#include <QtCore/qbytearraylist.h>
#if QT_CONFIG(icu)
#include <unicode/ucnv.h>
@@ -22,15 +24,21 @@
#include <qt_windows.h>
#ifndef QT_BOOTSTRAPPED
#include <QtCore/qvarlengtharray.h>
+#include <QtCore/q20iterator.h>
+#include <QtCore/private/qnumeric_p.h>
#endif // !QT_BOOTSTRAPPED
#endif
+#include <array>
+
#if __has_include(<bit>) && __cplusplus > 201703L
#include <bit>
#endif
QT_BEGIN_NAMESPACE
+using namespace QtMiscUtils;
+
static_assert(std::is_nothrow_move_constructible_v<QStringEncoder>);
static_assert(std::is_nothrow_move_assignable_v<QStringEncoder>);
static_assert(std::is_nothrow_move_constructible_v<QStringDecoder>);
@@ -40,8 +48,7 @@ enum { Endian = 0, Data = 1 };
static const uchar utf8bom[] = { 0xef, 0xbb, 0xbf };
-#if (defined(__SSE2__) && defined(QT_COMPILER_SUPPORTS_SSE2)) \
- || defined(__ARM_NEON__)
+#if defined(__SSE2__) || defined(__ARM_NEON__)
static Q_ALWAYS_INLINE uint qBitScanReverse(unsigned v) noexcept
{
#if defined(__cpp_lib_int_pow2) && __cpp_lib_int_pow2 >= 202002L
@@ -57,7 +64,7 @@ static Q_ALWAYS_INLINE uint qBitScanReverse(unsigned v) noexcept
}
#endif
-#if defined(__SSE2__) && defined(QT_COMPILER_SUPPORTS_SSE2)
+#if defined(__SSE2__)
static inline bool simdEncodeAscii(uchar *&dst, const char16_t *&nextAscii, const char16_t *&src, const char16_t *end)
{
// do sixteen characters at a time
@@ -194,14 +201,14 @@ static inline const uchar *simdFindNonAscii(const uchar *src, const uchar *end,
#ifdef __AVX2__
// do 32 characters at a time
// (this is similar to simdTestMask in qstring.cpp)
- const __m256i mask = _mm256_set1_epi8(0x80);
+ const __m256i mask = _mm256_set1_epi8(char(0x80));
for ( ; end - src >= 32; src += 32) {
__m256i data = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(src));
if (_mm256_testz_si256(mask, data))
continue;
uint n = _mm256_movemask_epi8(data);
- Q_ASSUME(n);
+ Q_ASSERT(n);
// find the next probable ASCII character
// we don't want to load 32 bytes again in this loop if we know there are non-ASCII
@@ -251,7 +258,7 @@ static inline const uchar *simdFindNonAscii(const uchar *src, const uchar *end,
// Compare only the US-ASCII beginning of [src8, end8) and [src16, end16)
// and advance src8 and src16 to the first character that could not be compared
-static void simdCompareAscii(const char8_t *&src8, const char8_t *end8, const char16_t *&src16, const char16_t *end16)
+static void simdCompareAscii(const qchar8_t *&src8, const qchar8_t *end8, const char16_t *&src16, const char16_t *end16)
{
int bitSpacing = 1;
qptrdiff len = qMin(end8 - src8, end16 - src16);
@@ -437,7 +444,7 @@ static inline const uchar *simdFindNonAscii(const uchar *src, const uchar *end,
return src;
}
-static void simdCompareAscii(const char8_t *&, const char8_t *, const char16_t *&, const char16_t *)
+static void simdCompareAscii(const qchar8_t *&, const qchar8_t *, const char16_t *&, const char16_t *)
{
}
#else
@@ -457,7 +464,7 @@ static inline const uchar *simdFindNonAscii(const uchar *src, const uchar *end,
return src;
}
-static void simdCompareAscii(const char8_t *&, const char8_t *, const char16_t *&, const char16_t *)
+static void simdCompareAscii(const qchar8_t *&, const qchar8_t *, const char16_t *&, const char16_t *)
{
}
#endif
@@ -504,8 +511,7 @@ QByteArray QUtf8::convertFromUnicode(QStringView in, QStringConverterBase::State
char *QUtf8::convertFromUnicode(char *out, QStringView in, QStringConverter::State *state)
{
Q_ASSERT(state);
- const QChar *uc = in.data();
- qsizetype len = in.length();
+ qsizetype len = in.size();
if (!len)
return out;
@@ -522,7 +528,7 @@ char *QUtf8::convertFromUnicode(char *out, QStringView in, QStringConverter::Sta
};
uchar *cursor = reinterpret_cast<uchar *>(out);
- const char16_t *src = reinterpret_cast<const char16_t *>(uc);
+ const char16_t *src = in.utf16();
const char16_t *const end = src + len;
if (!(state->flags & QStringDecoder::Flag::Stateless)) {
@@ -572,6 +578,21 @@ char *QUtf8::convertFromUnicode(char *out, QStringView in, QStringConverter::Sta
return reinterpret_cast<char *>(cursor);
}
+char *QUtf8::convertFromLatin1(char *out, QLatin1StringView in)
+{
+ // ### SIMD-optimize:
+ for (uchar ch : in) {
+ if (ch < 128) {
+ *out++ = ch;
+ } else {
+ // as per https://en.wikipedia.org/wiki/UTF-8#Encoding, 2nd row
+ *out++ = 0b110'0'0000u | (ch >> 6);
+ *out++ = 0b10'00'0000u | (ch & 0b0011'1111);
+ }
+ }
+ return out;
+}
+
QString QUtf8::convertToUnicode(QByteArrayView in)
{
// UTF-8 to UTF-16 always needs the exact same number of words or less:
@@ -593,14 +614,14 @@ QString QUtf8::convertToUnicode(QByteArrayView in)
return result;
}
-/*!
- \since 5.7
+/*! \internal
+ \since 6.6
\overload
Converts the UTF-8 sequence of bytes viewed by \a in to a sequence of
- QChar starting at \a buffer. The buffer is expected to be large enough
- to hold the result. An upper bound for the size of the buffer is
- \c in.size() QChars.
+ QChar starting at \a dst in the destination buffer. The buffer is expected
+ to be large enough to hold the result. An upper bound for the size of the
+ buffer is \c in.size() QChars.
If, during decoding, an error occurs, a QChar::ReplacementCharacter is
written.
@@ -608,11 +629,12 @@ QString QUtf8::convertToUnicode(QByteArrayView in)
Returns a pointer to one past the last QChar written.
This function never throws.
-*/
-QChar *QUtf8::convertToUnicode(QChar *buffer, QByteArrayView in) noexcept
+ For QChar buffers, instead of casting manually, you can use the static
+ QUtf8::convertToUnicode(QChar *, QByteArrayView) directly.
+*/
+char16_t *QUtf8::convertToUnicode(char16_t *dst, QByteArrayView in) noexcept
{
- char16_t *dst = reinterpret_cast<char16_t *>(buffer);
const uchar *const start = reinterpret_cast<const uchar *>(in.data());
const uchar *src = start;
const uchar *end = src + in.size();
@@ -635,7 +657,7 @@ QChar *QUtf8::convertToUnicode(QChar *buffer, QByteArrayView in) noexcept
do {
uchar b = *src++;
- int res = QUtf8Functions::fromUtf8<QUtf8BaseTraits>(b, dst, src, end);
+ const qsizetype res = QUtf8Functions::fromUtf8<QUtf8BaseTraits>(b, dst, src, end);
if (res < 0) {
// decoding error
*dst++ = QChar::ReplacementCharacter;
@@ -644,7 +666,7 @@ QChar *QUtf8::convertToUnicode(QChar *buffer, QByteArrayView in) noexcept
}
}
- return reinterpret_cast<QChar *>(dst);
+ return dst;
}
QString QUtf8::convertToUnicode(QByteArrayView in, QStringConverter::State *state)
@@ -665,23 +687,22 @@ QString QUtf8::convertToUnicode(QByteArrayView in, QStringConverter::State *stat
return result;
}
-QChar *QUtf8::convertToUnicode(QChar *out, QByteArrayView in, QStringConverter::State *state)
+char16_t *QUtf8::convertToUnicode(char16_t *dst, QByteArrayView in, QStringConverter::State *state)
{
qsizetype len = in.size();
Q_ASSERT(state);
if (!len)
- return out;
+ return dst;
char16_t replacement = QChar::ReplacementCharacter;
if (state->flags & QStringConverter::Flag::ConvertInvalidToNull)
replacement = QChar::Null;
- int res;
+ qsizetype res;
uchar ch = 0;
- char16_t *dst = reinterpret_cast<char16_t *>(out);
const uchar *src = reinterpret_cast<const uchar *>(in.data());
const uchar *end = src + len;
@@ -709,7 +730,7 @@ QChar *QUtf8::convertToUnicode(QChar *out, QByteArrayView in, QStringConverter::
// copy to our state and return
state->remainingChars = remainingCharsCount + newCharsToCopy;
memcpy(&state->state_data[0], remainingCharsData, state->remainingChars);
- return out;
+ return dst;
} else if (!headerdone) {
// eat the UTF-8 BOM
if (dst[-1] == 0xfeff)
@@ -765,7 +786,7 @@ QChar *QUtf8::convertToUnicode(QChar *out, QByteArrayView in, QStringConverter::
state->remainingChars = 0;
}
- return reinterpret_cast<QChar *>(dst);
+ return dst;
}
struct QUtf8NoOutputTraits : public QUtf8BaseTraitsNoAscii
@@ -795,7 +816,7 @@ QUtf8::ValidUtf8Result QUtf8::isValidUtf8(QByteArrayView in)
isValidAscii = false;
QUtf8NoOutputTraits::NoOutput output;
- int res = QUtf8Functions::fromUtf8<QUtf8NoOutputTraits>(b, output, src, end);
+ const qsizetype res = QUtf8Functions::fromUtf8<QUtf8NoOutputTraits>(b, output, src, end);
if (res < 0) {
// decoding error
return { false, false };
@@ -806,9 +827,9 @@ QUtf8::ValidUtf8Result QUtf8::isValidUtf8(QByteArrayView in)
return { true, isValidAscii };
}
-int QUtf8::compareUtf8(QByteArrayView utf8, QStringView utf16) noexcept
+int QUtf8::compareUtf8(QByteArrayView utf8, QStringView utf16, Qt::CaseSensitivity cs) noexcept
{
- auto src1 = reinterpret_cast<const char8_t *>(utf8.data());
+ auto src1 = reinterpret_cast<const qchar8_t *>(utf8.data());
auto end1 = src1 + utf8.size();
auto src2 = reinterpret_cast<const char16_t *>(utf16.data());
auto end2 = src2 + utf16.size();
@@ -822,7 +843,7 @@ int QUtf8::compareUtf8(QByteArrayView utf8, QStringView utf16) noexcept
if (uc1 >= 0x80) {
char32_t *output = &uc1;
- int res = QUtf8Functions::fromUtf8<QUtf8BaseTraitsNoAscii>(uc1, output, src1, end1);
+ qsizetype res = QUtf8Functions::fromUtf8<QUtf8BaseTraitsNoAscii>(uc1, output, src1, end1);
if (res < 0) {
// decoding error
uc1 = QChar::ReplacementCharacter;
@@ -833,7 +854,10 @@ int QUtf8::compareUtf8(QByteArrayView utf8, QStringView utf16) noexcept
if (QChar::isHighSurrogate(uc2) && src2 < end2 && QChar::isLowSurrogate(*src2))
uc2 = QChar::surrogateToUcs4(uc2, *src2++);
}
-
+ if (cs == Qt::CaseInsensitive) {
+ uc1 = QChar::toCaseFolded(uc1);
+ uc2 = QChar::toCaseFolded(uc2);
+ }
if (uc1 != uc2)
return int(uc1) - int(uc2);
}
@@ -843,7 +867,7 @@ int QUtf8::compareUtf8(QByteArrayView utf8, QStringView utf16) noexcept
return (end1 > src1) - int(end2 > src2);
}
-int QUtf8::compareUtf8(QByteArrayView utf8, QLatin1StringView s)
+int QUtf8::compareUtf8(QByteArrayView utf8, QLatin1StringView s, Qt::CaseSensitivity cs)
{
char32_t uc1 = QChar::Null;
auto src1 = reinterpret_cast<const uchar *>(utf8.data());
@@ -854,13 +878,17 @@ int QUtf8::compareUtf8(QByteArrayView utf8, QLatin1StringView s)
while (src1 < end1 && src2 < end2) {
uchar b = *src1++;
char32_t *output = &uc1;
- int res = QUtf8Functions::fromUtf8<QUtf8BaseTraits>(b, output, src1, end1);
+ const qsizetype res = QUtf8Functions::fromUtf8<QUtf8BaseTraits>(b, output, src1, end1);
if (res < 0) {
// decoding error
uc1 = QChar::ReplacementCharacter;
}
char32_t uc2 = *src2++;
+ if (cs == Qt::CaseInsensitive) {
+ uc1 = QChar::toCaseFolded(uc1);
+ uc2 = QChar::toCaseFolded(uc2);
+ }
if (uc1 != uc2)
return int(uc1) - int(uc2);
}
@@ -869,6 +897,52 @@ int QUtf8::compareUtf8(QByteArrayView utf8, QLatin1StringView s)
return (end1 > src1) - (end2 > src2);
}
+int QUtf8::compareUtf8(QByteArrayView lhs, QByteArrayView rhs, Qt::CaseSensitivity cs) noexcept
+{
+ if (lhs.isEmpty())
+ return qt_lencmp(0, rhs.size());
+
+ if (cs == Qt::CaseSensitive) {
+ const auto l = std::min(lhs.size(), rhs.size());
+ int r = memcmp(lhs.data(), rhs.data(), l);
+ return r ? r : qt_lencmp(lhs.size(), rhs.size());
+ }
+
+ char32_t uc1 = QChar::Null;
+ auto src1 = reinterpret_cast<const uchar *>(lhs.data());
+ auto end1 = src1 + lhs.size();
+ char32_t uc2 = QChar::Null;
+ auto src2 = reinterpret_cast<const uchar *>(rhs.data());
+ auto end2 = src2 + rhs.size();
+
+ while (src1 < end1 && src2 < end2) {
+ uchar b = *src1++;
+ char32_t *output = &uc1;
+ qsizetype res = QUtf8Functions::fromUtf8<QUtf8BaseTraits>(b, output, src1, end1);
+ if (res < 0) {
+ // decoding error
+ uc1 = QChar::ReplacementCharacter;
+ }
+
+ b = *src2++;
+ output = &uc2;
+ res = QUtf8Functions::fromUtf8<QUtf8BaseTraits>(b, output, src2, end2);
+ if (res < 0) {
+ // decoding error
+ uc2 = QChar::ReplacementCharacter;
+ }
+
+ uc1 = QChar::toCaseFolded(uc1);
+ uc2 = QChar::toCaseFolded(uc2);
+ if (uc1 != uc2)
+ return int(uc1) - int(uc2);
+ }
+
+ // the shorter string sorts first
+ return (end1 > src1) - (end2 > src2);
+}
+
+#ifndef QT_BOOTSTRAPPED
QByteArray QUtf16::convertFromUnicode(QStringView in, QStringConverter::State *state, DataEndianness endian)
{
bool writeBom = !(state->internalState & HeaderDone) && state->flags & QStringConverter::Flag::WriteBom;
@@ -878,7 +952,7 @@ QByteArray QUtf16::convertFromUnicode(QStringView in, QStringConverter::State *s
QByteArray d(length, Qt::Uninitialized);
char *end = convertFromUnicode(d.data(), in, state, endian);
- Q_ASSERT(end - d.constData() == d.length());
+ Q_ASSERT(end - d.constData() == d.size());
Q_UNUSED(end);
return d;
}
@@ -901,13 +975,13 @@ char *QUtf16::convertFromUnicode(char *out, QStringView in, QStringConverter::St
out += 2;
}
if (endian == BigEndianness)
- qToBigEndian<char16_t>(in.data(), in.length(), out);
+ qToBigEndian<char16_t>(in.data(), in.size(), out);
else
- qToLittleEndian<char16_t>(in.data(), in.length(), out);
+ qToLittleEndian<char16_t>(in.data(), in.size(), out);
state->remainingChars = 0;
state->internalState |= HeaderDone;
- return out + 2*in.length();
+ return out + 2*in.size();
}
QString QUtf16::convertToUnicode(QByteArrayView in, QStringConverter::State *state, DataEndianness endian)
@@ -1037,7 +1111,7 @@ char *QUtf32::convertFromUnicode(char *out, QStringView in, QStringConverter::St
}
const QChar *uc = in.data();
- const QChar *end = in.data() + in.length();
+ const QChar *end = in.data() + in.size();
QChar ch;
char32_t ucs4;
if (state->remainingChars == 1) {
@@ -1178,6 +1252,7 @@ QChar *QUtf32::convertToUnicode(QChar *out, QByteArrayView in, QStringConverter:
return out;
}
+#endif // !QT_BOOTSTRAPPED
#if defined(Q_OS_WIN) && !defined(QT_BOOTSTRAPPED)
int QLocal8Bit::checkUtf8()
@@ -1185,186 +1260,365 @@ int QLocal8Bit::checkUtf8()
return GetACP() == CP_UTF8 ? 1 : -1;
}
-static QString convertToUnicodeCharByChar(QByteArrayView in, QStringConverter::State *state)
+QString QLocal8Bit::convertToUnicode_sys(QByteArrayView in, QStringConverter::State *state)
{
- qsizetype length = in.size();
- const char *chars = in.data();
-
- Q_ASSERT(state);
- if (state->flags & QStringConverter::Flag::Stateless) // temporary
- state = nullptr;
-
- if (!chars || !length)
- return QString();
-
- qsizetype copyLocation = 0;
- qsizetype extra = 2;
- if (state && state->remainingChars) {
- copyLocation = state->remainingChars;
- extra += copyLocation;
- }
- qsizetype newLength = length + extra;
- char *mbcs = new char[newLength];
- //ensure that we have a NULL terminated string
- mbcs[newLength-1] = 0;
- mbcs[newLength-2] = 0;
- memcpy(&(mbcs[copyLocation]), chars, length);
- if (copyLocation) {
- //copy the last character from the state
- mbcs[0] = (char)state->state_data[0];
- state->remainingChars = 0;
- }
- const char *mb = mbcs;
- const char *next = 0;
- QString s;
- while ((next = CharNextExA(CP_ACP, mb, 0)) != mb) {
- wchar_t wc[2] ={0};
- int charlength = next - mb;
- int len = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED|MB_ERR_INVALID_CHARS, mb, charlength, wc, 2);
- if (len>0) {
- s.append(QChar(wc[0]));
- } else {
- int r = GetLastError();
- //check if the character being dropped is the last character
- if (r == ERROR_NO_UNICODE_TRANSLATION && mb == (mbcs+newLength -3) && state) {
- state->remainingChars = 1;
- state->state_data[0] = (char)*mb;
- }
- }
- mb = next;
- }
- delete [] mbcs;
- return s;
+ return convertToUnicode_sys(in, CP_ACP, state);
}
-
-QString QLocal8Bit::convertToUnicode_sys(QByteArrayView in, QStringConverter::State *state)
+QString QLocal8Bit::convertToUnicode_sys(QByteArrayView in, quint32 codePage,
+ QStringConverter::State *state)
{
- qsizetype length = in.size();
-
- Q_ASSERT(length < INT_MAX); // ### FIXME
const char *mb = in.data();
- int mblen = length;
+ qsizetype mblen = in.size();
+
+ Q_ASSERT(state);
+ qsizetype &invalidChars = state->invalidChars;
+ using Flag = QStringConverter::Flag;
+ const bool useNullForReplacement = !!(state->flags & Flag::ConvertInvalidToNull);
+ const char16_t replacementCharacter = useNullForReplacement ? QChar::Null
+ : QChar::ReplacementCharacter;
+ if (state->flags & Flag::Stateless) {
+ Q_ASSERT(state->remainingChars == 0);
+ state = nullptr;
+ }
if (!mb || !mblen)
return QString();
- QVarLengthArray<wchar_t, 4096> wc(4096);
- int len;
+ // Use a local stack-buffer at first to allow us a decently large container
+ // to avoid a lot of resizing, without also returning an overallocated
+ // QString to the user for small strings.
+ // Then we can be fast for small strings and take the hit of extra resizes
+ // and measuring how much storage is needed for large strings.
+ std::array<wchar_t, 4096> buf;
+ wchar_t *out = buf.data();
+ qsizetype outlen = buf.size();
+
QString sp;
- bool prepend = false;
- char state_data = 0;
- int remainingChars = 0;
-
- //save the current state information
- if (state) {
- state_data = (char)state->state_data[0];
- remainingChars = state->remainingChars;
- }
- //convert the pending character (if available)
- if (state && remainingChars) {
- char prev[3] = {0};
- prev[0] = state_data;
- prev[1] = mb[0];
- remainingChars = 0;
- len = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED,
- prev, 2, wc.data(), wc.length());
- if (len) {
- sp.append(QChar(wc[0]));
- if (mblen == 1) {
- state->remainingChars = 0;
- return sp;
- }
- prepend = true;
- mb++;
- mblen--;
- wc[0] = 0;
+ // Return a pointer to storage where we have enough space for `size`
+ const auto growOut = [&](qsizetype size) -> std::tuple<wchar_t *, qsizetype> {
+ if (outlen >= size)
+ return {out, outlen};
+ const bool wasStackBuffer = sp.isEmpty();
+ const auto begin = wasStackBuffer ? buf.data() : reinterpret_cast<wchar_t *>(sp.data());
+ const qsizetype offset = qsizetype(std::distance(begin, out));
+ qsizetype newSize = 0;
+ if (Q_UNLIKELY(qAddOverflow(offset, size, &newSize))) {
+ Q_CHECK_PTR(false);
+ return {nullptr, 0};
}
+ sp.resize(newSize);
+ auto it = reinterpret_cast<wchar_t *>(sp.data());
+ if (wasStackBuffer)
+ it = std::copy_n(buf.data(), offset, it);
+ else
+ it += offset;
+ return {it, size};
+ };
+
+ // Convert the pending characters (if available)
+ while (state && state->remainingChars && mblen) {
+ QStringConverter::State localState;
+ localState.flags = state->flags;
+ // Use at most 6 characters as a guess for the longest encoded character
+ // in any multibyte encoding.
+ // Even with a total of 2 bytes of overhead that would leave around
+ // 2^(4 * 8) possible characters
+ std::array<char, 6> prev = {0};
+ Q_ASSERT(state->remainingChars <= q20::ssize(state->state_data));
+ qsizetype index = 0;
+ for (; index < state->remainingChars; ++index)
+ prev[index] = state->state_data[index];
+ const qsizetype toCopy = std::min(q20::ssize(prev) - index, mblen);
+ for (qsizetype i = 0; i < toCopy; ++i, ++index)
+ prev[index] = mb[i];
+ mb += toCopy;
+ mblen -= toCopy;
+
+ // Recursing:
+ // Since we are using a clean local state it will try to decode what was
+ // stored in our state + some extra octets from input (`prev`). If some
+ // part fails we will have those characters stored in the local state's
+ // storage, and we can extract those. It may also output some
+ // replacement characters, which we'll count in the invalidChars.
+ // In the best case we only do this once, but we will loop until we have
+ // resolved all the remaining characters or we have run out of new input
+ // in which case we may still have remaining characters.
+ const QString tmp = convertToUnicode_sys(QByteArrayView(prev.data(), index), codePage,
+ &localState);
+ std::tie(out, outlen) = growOut(tmp.size());
+ if (!out)
+ return {};
+ out = std::copy_n(reinterpret_cast<const wchar_t *>(tmp.constData()), tmp.size(), out);
+ outlen -= tmp.size();
+ const qsizetype tail = toCopy - localState.remainingChars;
+ if (tail >= 0) {
+ // Everything left to process comes from `in`, so we can stop
+ // looping. Adjust the window for `in` and unset remainingChars to
+ // signal that we're done.
+ mb -= localState.remainingChars;
+ mblen += localState.remainingChars;
+ localState.remainingChars = 0;
+ }
+ state->remainingChars = localState.remainingChars;
+ state->invalidChars += localState.invalidChars;
+ std::copy_n(localState.state_data, state->remainingChars, state->state_data);
}
- while (!(len=MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED|MB_ERR_INVALID_CHARS,
- mb, mblen, wc.data(), wc.length()))) {
- int r = GetLastError();
- if (r == ERROR_INSUFFICIENT_BUFFER) {
- const int wclen = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED,
- mb, mblen, 0, 0);
- wc.resize(wclen);
- } else if (r == ERROR_NO_UNICODE_TRANSLATION) {
- //find the last non NULL character
- while (mblen > 1 && !(mb[mblen-1]))
- mblen--;
- //check whether, we hit an invalid character in the middle
- if ((mblen <= 1) || (remainingChars && state_data))
- return convertToUnicodeCharByChar(in, state);
- //Remove the last character and try again...
- state_data = mb[mblen-1];
- remainingChars = 1;
- mblen--;
+ Q_ASSERT(!state || state->remainingChars == 0 || mblen == 0);
+
+ // Need it in this scope, since we try to decrease our window size if we
+ // encounter an error
+ int nextIn = qt_saturate<int>(mblen);
+ while (mblen > 0) {
+ std::tie(out, outlen) = growOut(1); // Need space for at least one character
+ if (!out)
+ return {};
+ const int nextOut = qt_saturate<int>(outlen);
+ int len = MultiByteToWideChar(codePage, MB_ERR_INVALID_CHARS, mb, nextIn, out, nextOut);
+ if (len) {
+ mb += nextIn;
+ mblen -= nextIn;
+ out += len;
+ outlen -= len;
} else {
- // Fail.
- qWarning("MultiByteToWideChar: Cannot convert multibyte text");
- break;
+ int r = GetLastError();
+ if (r == ERROR_INSUFFICIENT_BUFFER) {
+ const int wclen = MultiByteToWideChar(codePage, 0, mb, nextIn, 0, 0);
+ std::tie(out, outlen) = growOut(wclen);
+ if (!out)
+ return {};
+ } else if (r == ERROR_NO_UNICODE_TRANSLATION) {
+ // Can't decode the current window, so either store the state,
+ // reduce window size or output a replacement character.
+
+ // Check if we can store all remaining characters in the state
+ // to be used next time we're called:
+ if (state && mblen <= q20::ssize(state->state_data)) {
+ state->remainingChars = mblen;
+ std::copy_n(mb, mblen, state->state_data);
+ mb += mblen;
+ mblen = 0;
+ break;
+ }
+
+ // .. if not, try to find the last valid character in the window
+ // and try again with a shrunken window:
+ if (nextIn > 1) {
+ // There may be some incomplete data at the end of our current
+ // window, so decrease the window size and try again.
+ // In the worst case scenario there is gigs of undecodable
+ // garbage, but what are we supposed to do about that?
+ const auto it = CharPrevExA(codePage, mb, mb + nextIn, 0);
+ if (it != mb)
+ nextIn = int(it - mb);
+ else
+ --nextIn;
+ continue;
+ }
+
+ // Finally, we are forced to output a replacement character for
+ // the first byte in the window:
+ std::tie(out, outlen) = growOut(1);
+ if (!out)
+ return {};
+ *out = replacementCharacter;
+ ++invalidChars;
+ ++out;
+ --outlen;
+ ++mb;
+ --mblen;
+ } else {
+ // Fail.
+ qWarning("MultiByteToWideChar: Cannot convert multibyte text");
+ break;
+ }
}
+ nextIn = qt_saturate<int>(mblen);
}
- if (len <= 0)
- return QString();
+ if (sp.isEmpty()) {
+ // We must have only used the stack buffer
+ if (out != buf.data()) // else: we return null-string
+ sp = QStringView(buf.data(), out).toString();
+ } else{
+ const auto begin = reinterpret_cast<wchar_t *>(sp.data());
+ sp.truncate(std::distance(begin, out));
+ }
- if (wc[len-1] == 0) // len - 1: we don't want terminator
- --len;
+ if (sp.size() && sp.back().isNull())
+ sp.chop(1);
- //save the new state information
- if (state) {
- state->state_data[0] = (char)state_data;
- state->remainingChars = remainingChars;
- }
- QString s((QChar*)wc.data(), len);
- if (prepend) {
- return sp+s;
+ if (!state && mblen > 0) {
+ // We have trailing character(s) that could not be converted, and
+ // nowhere to cache them
+ sp.resize(sp.size() + mblen, replacementCharacter);
+ invalidChars += mblen;
}
- return s;
+ return sp;
}
QByteArray QLocal8Bit::convertFromUnicode_sys(QStringView in, QStringConverter::State *state)
{
- const QChar *ch = in.data();
+ return convertFromUnicode_sys(in, CP_ACP, state);
+}
+
+QByteArray QLocal8Bit::convertFromUnicode_sys(QStringView in, quint32 codePage,
+ QStringConverter::State *state)
+{
+ const wchar_t *ch = reinterpret_cast<const wchar_t *>(in.data());
qsizetype uclen = in.size();
- Q_ASSERT(uclen < INT_MAX); // ### FIXME
Q_ASSERT(state);
- Q_UNUSED(state); // ### Fixme
- if (state->flags & QStringConverter::Flag::Stateless) // temporary
+ // The Windows API has a *boolean* out-parameter that says if a replacement
+ // character was used, but it gives us no way to know _how many_ were used.
+ // Since we cannot simply scan the string for replacement characters
+ // (which is potentially a question mark, and thus a valid character),
+ // we simply do not track the number of invalid characters here.
+ // auto &invalidChars = state->invalidChars;
+
+ using Flag = QStringConverter::Flag;
+ if (state->flags & Flag::Stateless) { // temporary
+ Q_ASSERT(state->remainingChars == 0);
state = nullptr;
+ }
if (!ch)
return QByteArray();
if (uclen == 0)
return QByteArray("");
- BOOL used_def;
- QByteArray mb(4096, 0);
- int len;
- while (!(len=WideCharToMultiByte(CP_ACP, 0, (const wchar_t*)ch, uclen,
- mb.data(), mb.size()-1, 0, &used_def)))
- {
- int r = GetLastError();
- if (r == ERROR_INSUFFICIENT_BUFFER) {
- mb.resize(1+WideCharToMultiByte(CP_ACP, 0,
- (const wchar_t*)ch, uclen,
- 0, 0, 0, &used_def));
- // and try again...
+
+ // Use a local stack-buffer at first to allow us a decently large container
+ // to avoid a lot of resizing, without also returning an overallocated
+ // QByteArray to the user for small strings.
+ // Then we can be fast for small strings and take the hit of extra resizes
+ // and measuring how much storage is needed for large strings.
+ std::array<char, 4096> buf;
+ char *out = buf.data();
+ qsizetype outlen = buf.size();
+ QByteArray mb;
+
+ if (state && state->remainingChars > 0) {
+ Q_ASSERT(state->remainingChars == 1);
+ // Let's try to decode the pending character
+ wchar_t wc[2] = { wchar_t(state->state_data[0]), ch[0] };
+ // Check if the second character is a valid low surrogate,
+ // otherwise we'll just decode the first character, for which windows
+ // will output a replacement character.
+ const bool validCodePoint = QChar::isLowSurrogate(wc[1]);
+ int len = WideCharToMultiByte(codePage, 0, wc, validCodePoint ? 2 : 1, out, outlen, nullptr,
+ nullptr);
+ if (!len)
+ return {}; // Cannot recover, and I refuse to believe it was a size limitation
+ out += len;
+ outlen -= len;
+ if (validCodePoint) {
+ ++ch;
+ --uclen;
+ }
+ state->remainingChars = 0;
+ state->state_data[0] = 0;
+ if (uclen == 0)
+ return QByteArrayView(buf.data(), len).toByteArray();
+ }
+
+ if (state && QChar::isHighSurrogate(ch[uclen - 1])) {
+ // We can handle a missing low surrogate at the end of the string,
+ // so if there is one, exclude it now and store it in the state.
+ state->remainingChars = 1;
+ state->state_data[0] = ch[uclen - 1];
+ --uclen;
+ if (uclen == 0)
+ return QByteArray();
+ }
+
+ Q_ASSERT(uclen > 0);
+
+ // Return a pointer to storage where we have enough space for `size`
+ const auto growOut = [&](qsizetype size) -> std::tuple<char *, qsizetype> {
+ if (outlen >= size)
+ return {out, outlen};
+ const bool wasStackBuffer = mb.isEmpty();
+ const auto begin = wasStackBuffer ? buf.data() : mb.data();
+ const qsizetype offset = qsizetype(std::distance(begin, out));
+ qsizetype newSize = 0;
+ if (Q_UNLIKELY(qAddOverflow(offset, size, &newSize))) {
+ Q_CHECK_PTR(false);
+ return {nullptr, 0};
+ }
+ mb.resize(newSize);
+ auto it = mb.data();
+ if (wasStackBuffer)
+ it = std::copy_n(buf.data(), offset, it);
+ else
+ it += offset;
+ return {it, size};
+ };
+
+ const auto getNextWindowSize = [&]() {
+ int nextIn = qt_saturate<int>(uclen);
+ // The Windows API has some issues if the current window ends in the
+ // middle of a surrogate pair, so we avoid that:
+ if (nextIn > 1 && QChar::isHighSurrogate(ch[nextIn - 1]))
+ --nextIn;
+ return nextIn;
+ };
+
+ int len = 0;
+ while (uclen > 0) {
+ const int nextIn = getNextWindowSize();
+ std::tie(out, outlen) = growOut(1); // We need at least one byte
+ if (!out)
+ return {};
+ const int nextOut = qt_saturate<int>(outlen);
+ len = WideCharToMultiByte(codePage, 0, ch, nextIn, out, nextOut, nullptr, nullptr);
+ if (len > 0) {
+ ch += nextIn;
+ uclen -= nextIn;
+ out += len;
+ outlen -= len;
} else {
- // Fail. Probably can't happen in fact (dwFlags is 0).
+ int r = GetLastError();
+ if (r == ERROR_INSUFFICIENT_BUFFER) {
+ int neededLength = WideCharToMultiByte(codePage, 0, ch, nextIn, nullptr, 0,
+ nullptr, nullptr);
+ if (neededLength <= 0) {
+ // Fail. Observed with UTF8 where the input window was max int and ended in an
+ // incomplete sequence, probably a Windows bug. We try to avoid that from
+ // happening by reducing the window size in that case. But let's keep this
+ // branch just in case of other bugs.
#ifndef QT_NO_DEBUG
- // Can't use qWarning(), as it'll recurse to handle %ls
- fprintf(stderr,
- "WideCharToMultiByte: Cannot convert multibyte text (error %d): %ls\n",
- r, reinterpret_cast<const wchar_t*>(QString(ch, uclen).utf16()));
+ r = GetLastError();
+ fprintf(stderr,
+ "WideCharToMultiByte: Cannot convert multibyte text (error %d)\n", r);
+#endif // !QT_NO_DEBUG
+ break;
+ }
+ std::tie(out, outlen) = growOut(neededLength);
+ if (!out)
+ return {};
+ // and try again...
+ } else {
+ // Fail. Probably can't happen in fact (dwFlags is 0).
+#ifndef QT_NO_DEBUG
+ // Can't use qWarning(), as it'll recurse to handle %ls
+ fprintf(stderr,
+ "WideCharToMultiByte: Cannot convert multibyte text (error %d): %ls\n", r,
+ reinterpret_cast<const wchar_t *>(
+ QStringView(ch, uclen).left(100).toString().utf16()));
#endif
- break;
+ break;
+ }
}
}
- mb.resize(len);
+ if (mb.isEmpty()) {
+ // We must have only used the stack buffer
+ if (out != buf.data()) // else: we return null-array
+ mb = QByteArrayView(buf.data(), out).toByteArray();
+ } else {
+ mb.truncate(std::distance(mb.data(), out));
+ }
return mb;
}
#endif
@@ -1395,6 +1649,7 @@ void QStringConverter::State::reset() noexcept
}
}
+#ifndef QT_BOOTSTRAPPED
static QChar *fromUtf16(QChar *out, QByteArrayView in, QStringConverter::State *state)
{
return QUtf16::convertToUnicode(out, in, state, DetectEndianness);
@@ -1454,20 +1709,9 @@ static char *toUtf32LE(char *out, QStringView in, QStringConverter::State *state
{
return QUtf32::convertFromUnicode(out, in, state, LittleEndianness);
}
+#endif // !QT_BOOTSTRAPPED
-void qt_from_latin1(char16_t *dst, const char *str, size_t size) noexcept;
-
-static QChar *fromLatin1(QChar *out, QByteArrayView in, QStringConverter::State *state)
-{
- Q_ASSERT(state);
- Q_UNUSED(state);
-
- qt_from_latin1(reinterpret_cast<char16_t *>(out), in.data(), size_t(in.size()));
- return out + in.size();
-}
-
-
-static char *toLatin1(char *out, QStringView in, QStringConverter::State *state)
+char *QLatin1::convertFromUnicode(char *out, QStringView in, QStringConverter::State *state) noexcept
{
Q_ASSERT(state);
if (state->flags & QStringConverter::Flag::Stateless) // temporary
@@ -1475,7 +1719,7 @@ static char *toLatin1(char *out, QStringView in, QStringConverter::State *state)
const char replacement = (state && state->flags & QStringConverter::Flag::ConvertInvalidToNull) ? 0 : '?';
qsizetype invalid = 0;
- for (qsizetype i = 0; i < in.length(); ++i) {
+ for (qsizetype i = 0; i < in.size(); ++i) {
if (in[i] > QChar(0xff)) {
*out = replacement;
++invalid;
@@ -1492,26 +1736,28 @@ static char *toLatin1(char *out, QStringView in, QStringConverter::State *state)
static QChar *fromLocal8Bit(QChar *out, QByteArrayView in, QStringConverter::State *state)
{
QString s = QLocal8Bit::convertToUnicode(in, state);
- memcpy(out, s.constData(), s.length()*sizeof(QChar));
- return out + s.length();
+ memcpy(out, s.constData(), s.size()*sizeof(QChar));
+ return out + s.size();
}
static char *toLocal8Bit(char *out, QStringView in, QStringConverter::State *state)
{
QByteArray s = QLocal8Bit::convertFromUnicode(in, state);
- memcpy(out, s.constData(), s.length());
- return out + s.length();
+ memcpy(out, s.constData(), s.size());
+ return out + s.size();
}
static qsizetype fromUtf8Len(qsizetype l) { return l + 1; }
static qsizetype toUtf8Len(qsizetype l) { return 3*(l + 1); }
+#ifndef QT_BOOTSTRAPPED
static qsizetype fromUtf16Len(qsizetype l) { return l/2 + 2; }
static qsizetype toUtf16Len(qsizetype l) { return 2*(l + 1); }
static qsizetype fromUtf32Len(qsizetype l) { return l/2 + 2; }
static qsizetype toUtf32Len(qsizetype l) { return 4*(l + 1); }
+#endif
static qsizetype fromLatin1Len(qsizetype l) { return l + 1; }
static qsizetype toLatin1Len(qsizetype l) { return l + 1; }
@@ -1544,7 +1790,7 @@ static qsizetype toLatin1Len(qsizetype l) { return l + 1; }
operation, encoding UTF-16 encoded data (usually in the form of a QString) to
the requested encoding.
- The supported encodings are:
+ The following encodings are always supported:
\list
\li UTF-8
@@ -1558,6 +1804,10 @@ static qsizetype toLatin1Len(qsizetype l) { return l + 1; }
\li The system encoding
\endlist
+ QStringConverter may support more encodings depending on how Qt was
+ compiled. If more codecs are supported, they can be listed using
+ availableCodecs().
+
\l {QStringConverter}s can be used as follows to convert some encoded
string to and from UTF-16.
@@ -1647,34 +1897,31 @@ static qsizetype toLatin1Len(qsizetype l) { return l + 1; }
const QStringConverter::Interface QStringConverter::encodingInterfaces[QStringConverter::LastEncoding + 1] =
{
{ "UTF-8", QUtf8::convertToUnicode, fromUtf8Len, QUtf8::convertFromUnicode, toUtf8Len },
+#ifndef QT_BOOTSTRAPPED
{ "UTF-16", fromUtf16, fromUtf16Len, toUtf16, toUtf16Len },
{ "UTF-16LE", fromUtf16LE, fromUtf16Len, toUtf16LE, toUtf16Len },
{ "UTF-16BE", fromUtf16BE, fromUtf16Len, toUtf16BE, toUtf16Len },
{ "UTF-32", fromUtf32, fromUtf32Len, toUtf32, toUtf32Len },
{ "UTF-32LE", fromUtf32LE, fromUtf32Len, toUtf32LE, toUtf32Len },
{ "UTF-32BE", fromUtf32BE, fromUtf32Len, toUtf32BE, toUtf32Len },
- { "ISO-8859-1", fromLatin1, fromLatin1Len, toLatin1, toLatin1Len },
+#endif
+ { "ISO-8859-1", QLatin1::convertToUnicode, fromLatin1Len, QLatin1::convertFromUnicode, toLatin1Len },
{ "Locale", fromLocal8Bit, fromUtf8Len, toLocal8Bit, toUtf8Len }
};
// match names case insensitive and skipping '-' and '_'
static bool nameMatch(const char *a, const char *b)
{
- while (*a && *b) {
- if (*a == '-' || *a == '_') {
+ do {
+ while (*a == '-' || *a == '_')
++a;
- continue;
- }
- if (*b == '-' || *b == '_') {
+ while (*b == '-' || *b == '_')
++b;
- continue;
- }
- if (QtMiscUtils::toAsciiLower(*a) != QtMiscUtils::toAsciiLower(*b))
- return false;
- ++a;
- ++b;
- }
- return !*a && !*b;
+ if (!*a && !*b) // end of both strings
+ return true;
+ } while (QtMiscUtils::toAsciiLower(*a++) == QtMiscUtils::toAsciiLower(*b++));
+
+ return false;
}
@@ -1729,7 +1976,7 @@ struct QStringConverterICU : QStringConverter
const void *context;
ucnv_getToUCallBack(icu_conv, &action, &context);
if (context != state)
- ucnv_setToUCallBack(icu_conv, action, &state, nullptr, nullptr, &err);
+ ucnv_setToUCallBack(icu_conv, action, state, nullptr, nullptr, &err);
ucnv_toUnicode(icu_conv, &target, targetLimit, &source, sourceLimit, nullptr, flush, &err);
// We did reserve enough space:
@@ -1751,7 +1998,7 @@ struct QStringConverterICU : QStringConverter
auto source = reinterpret_cast<const UChar *>(in.data());
auto sourceLimit = reinterpret_cast<const UChar *>(in.data() + in.size());
- qsizetype length = UCNV_GET_MAX_BYTES_FOR_STRING(in.length(), ucnv_getMaxCharSize(icu_conv));
+ qsizetype length = UCNV_GET_MAX_BYTES_FOR_STRING(in.size(), ucnv_getMaxCharSize(icu_conv));
char *target = out;
char *targetLimit = out + length;
@@ -1762,7 +2009,7 @@ struct QStringConverterICU : QStringConverter
const void *context;
ucnv_getFromUCallBack(icu_conv, &action, &context);
if (context != state)
- ucnv_setFromUCallBack(icu_conv, action, &state, nullptr, nullptr, &err);
+ ucnv_setFromUCallBack(icu_conv, action, state, nullptr, nullptr, &err);
ucnv_fromUnicode(icu_conv, &target, targetLimit, &source, sourceLimit, nullptr, flush, &err);
// We did reserve enough space:
@@ -1977,6 +2224,7 @@ const char *QStringConverter::name() const noexcept
Returns the canonical name of the encoding this QStringConverter can encode or decode.
Returns a nullptr if the converter is not valid.
+ The returned name is UTF-8 encoded.
\sa isValid()
*/
@@ -1988,10 +2236,14 @@ const char *QStringConverter::name() const noexcept
\c{std::nullopt} is returned. Such a name may, none the less, be accepted by
the QStringConverter constructor when Qt is built with ICU, if ICU provides a
converter with the given name.
+
+ \a name is expected to be UTF-8 encoded.
*/
std::optional<QStringConverter::Encoding> QStringConverter::encodingForName(const char *name) noexcept
{
- for (int i = 0; i < LastEncoding + 1; ++i) {
+ if (!name)
+ return std::nullopt;
+ for (qsizetype i = 0; i < LastEncoding + 1; ++i) {
if (nameMatch(encodingInterfaces[i].name, name))
return QStringConverter::Encoding(i);
}
@@ -2000,6 +2252,7 @@ std::optional<QStringConverter::Encoding> QStringConverter::encodingForName(cons
return std::nullopt;
}
+#ifndef QT_BOOTSTRAPPED
/*!
Returns the encoding for the content of \a data if it can be determined.
\a expectedFirstCharacter can be passed as an additional hint to help determine
@@ -2059,7 +2312,7 @@ static QByteArray parseHtmlMetaForEncoding(QByteArrayView data)
if (pos != -1) {
pos = charsetSearcher.indexIn(header, pos);
if (pos != -1) {
- pos += int(qstrlen("charset="));
+ pos += qstrlen("charset=");
if (pos < header.size() && (header.at(pos) == '\"' || header.at(pos) == '\''))
++pos;
@@ -2108,6 +2361,63 @@ std::optional<QStringConverter::Encoding> QStringConverter::encodingForHtml(QByt
return Utf8;
}
+static qsizetype availableCodecCount()
+{
+#if !QT_CONFIG(icu)
+ return QStringConverter::Encoding::LastEncoding;
+#else
+ /* icu contains also the names of what Qt provides
+ except for the special Locale one (so add one for it)
+ */
+ return 1 + ucnv_countAvailable();
+#endif
+}
+
+/*!
+ Returns a list of names of supported codecs. The names returned
+ by this function can be passed to QStringEncoder's and
+ QStringDecoder's constructor to create a en- or decoder for
+ the given codec.
+
+ This function may be used to obtain a listing of additional codecs beyond
+ the standard ones. Support for additional codecs requires Qt be compiled
+ with support for the ICU library.
+
+ \note The order of codecs is an internal implementation detail
+ and not guaranteed to be stable.
+ */
+QStringList QStringConverter::availableCodecs()
+{
+ auto availableCodec = [](qsizetype index) -> QString
+ {
+ #if !QT_CONFIG(icu)
+ return QString::fromLatin1(encodingInterfaces[index].name);
+ #else
+ if (index == 0) // "Locale", not provided by icu
+ return QString::fromLatin1(
+ encodingInterfaces[QStringConverter::Encoding::System].name);
+ // this mirrors the setup we do to set a converters name
+ UErrorCode status = U_ZERO_ERROR;
+ auto icuName = ucnv_getAvailableName(int32_t(index - 1));
+ const char *standardName = ucnv_getStandardName(icuName, "MIME", &status);
+ if (U_FAILURE(status) || !standardName) {
+ status = U_ZERO_ERROR;
+ standardName = ucnv_getStandardName(icuName, "IANA", &status);
+ }
+ if (!standardName)
+ standardName = icuName;
+ return QString::fromLatin1(standardName);
+ #endif
+ };
+
+ qsizetype codecCount = availableCodecCount();
+ QStringList result;
+ result.reserve(codecCount);
+ for (qsizetype i = 0; i < codecCount; ++i)
+ result.push_back(availableCodec(i));
+ return result;
+}
+
/*!
Tries to determine the encoding of the HTML in \a data by looking at leading byte
order marks or a charset specifier in the HTML meta tag and returns a QStringDecoder
@@ -2131,7 +2441,7 @@ QStringDecoder QStringDecoder::decoderForHtml(QByteArrayView data)
return QStringDecoder(Utf8);
}
-
+#endif // !QT_BOOTSTRAPPED
/*!
Returns the canonical name for encoding \a e.
@@ -2200,12 +2510,14 @@ const char *QStringConverter::nameForEncoding(QStringConverter::Encoding e)
*/
/*!
- \fn QByteArray QStringEncoder::encode(const QString &in)
- \fn QByteArray QStringEncoder::encode(QStringView in)
- \fn QByteArray QStringEncoder::operator()(const QString &in)
- \fn QByteArray QStringEncoder::operator()(QStringView in)
+ \fn QStringEncoder::DecodedData<const QString &> QStringEncoder::encode(const QString &in)
+ \fn QStringEncoder::DecodedData<QStringView> QStringEncoder::encode(QStringView in)
+ \fn QStringEncoder::DecodedData<const QString &> QStringEncoder::operator()(const QString &in)
+ \fn QStringEncoder::DecodedData<QStringView> QStringEncoder::operator()(QStringView in)
- Converts \a in and returns the data as a byte array.
+ Converts \a in and returns a struct that is implicitly convertible to QByteArray.
+
+ \snippet code/src_corelib_text_qstringconverter.cpp 5
*/
/*!
@@ -2289,12 +2601,15 @@ const char *QStringConverter::nameForEncoding(QStringConverter::Encoding e)
*/
/*!
- \fn QString QStringDecoder::operator()(const QByteArray &ba)
- \fn QString QStringDecoder::decode(const QByteArray &ba)
- \fn QString QStringDecoder::operator()(QByteArrayView ba)
- \fn QString QStringDecoder::decode(QByteArrayView ba)
+ \fn QStringDecoder::EncodedData<const QByteArray &> QStringDecoder::operator()(const QByteArray &ba)
+ \fn QStringDecoder::EncodedData<const QByteArray &> QStringDecoder::decode(const QByteArray &ba)
+ \fn QStringDecoder::EncodedData<QByteArrayView> QStringDecoder::operator()(QByteArrayView ba)
+ \fn QStringDecoder::EncodedData<QByteArrayView> QStringDecoder::decode(QByteArrayView ba)
+
+ Converts \a ba and returns a struct that is implicitly convertible to QString.
- Converts \a ba and returns the data as a QString.
+
+ \snippet code/src_corelib_text_qstringconverter.cpp 4
*/
/*!
@@ -2319,4 +2634,10 @@ const char *QStringConverter::nameForEncoding(QStringConverter::Encoding e)
\sa requiredSpace
*/
+/*!
+ \fn char16_t *QStringDecoder::appendToBuffer(char16_t *out, QByteArrayView in)
+ \since 6.6
+ \overload
+*/
+
QT_END_NAMESPACE