From fa8d021fa6fcb040fb702b6ffd2deee52a3b748a Mon Sep 17 00:00:00 2001 From: Lars Knoll Date: Thu, 30 Jul 2020 17:36:14 +0200 Subject: Convert a couple of APIs to use views Try to get rid of APIs that use raw 'const {char, QChar} *, length' pairs. Instead, use QByteArrayView or QStringView. As QStringConverter is a new class, simply change the API to what we'd like to have. Also adjust hidden API in QStringBuilder and friends. Change-Id: I897d47f63a7b965f5574a1e51da64147f9e981f6 Reviewed-by: Lars Knoll --- src/corelib/text/qstring.cpp | 20 ++-- src/corelib/text/qstringbuilder.cpp | 9 +- src/corelib/text/qstringbuilder.h | 12 +-- src/corelib/text/qstringconverter.cpp | 191 +++++++++++++++++++--------------- src/corelib/text/qstringconverter.h | 70 +++++-------- src/corelib/text/qstringconverter_p.h | 42 ++++---- 6 files changed, 171 insertions(+), 173 deletions(-) (limited to 'src/corelib/text') diff --git a/src/corelib/text/qstring.cpp b/src/corelib/text/qstring.cpp index 5721a25106..2838e513f1 100644 --- a/src/corelib/text/qstring.cpp +++ b/src/corelib/text/qstring.cpp @@ -1285,7 +1285,7 @@ static int qt_compare_strings(QLatin1String lhs, QLatin1String rhs, Qt::CaseSens static int qt_compare_strings(QBasicUtf8StringView lhs, QStringView rhs, Qt::CaseSensitivity cs) noexcept { if (cs == Qt::CaseSensitive) - return QUtf8::compareUtf8(lhs.data(), lhs.size(), rhs.data(), rhs.size()); + return QUtf8::compareUtf8(lhs, rhs); else return ucstricmp8(lhs.begin(), lhs.end(), rhs.begin(), rhs.end()); } @@ -5059,7 +5059,7 @@ static QByteArray qt_convert_to_utf8(QStringView str) if (str.isNull()) return QByteArray(); - return QUtf8::convertFromUnicode(str.data(), str.length()); + return QUtf8::convertFromUnicode(str); } /*! @@ -5210,7 +5210,7 @@ QString QString::fromLocal8Bit(QByteArrayView ba) if (ba.isEmpty()) return QString(DataPointer::fromRawData(&_empty, 0)); QStringDecoder toUtf16(QStringDecoder::System, QStringDecoder::Flag::Stateless); - return toUtf16(ba.data(), ba.size()); + return toUtf16(ba); } /*! \fn QString QString::fromUtf8(const char *str, qsizetype size) @@ -5263,7 +5263,7 @@ QString QString::fromUtf8(QByteArrayView ba) return QString(); if (ba.isEmpty()) return QString(DataPointer::fromRawData(&_empty, 0)); - return QUtf8::convertToUnicode(ba.data(), ba.size()); + return QUtf8::convertToUnicode(ba); } /*! @@ -5293,7 +5293,7 @@ QString QString::fromUtf16(const char16_t *unicode, qsizetype size) ++size; } QStringDecoder toUtf16(QStringDecoder::Utf16, QStringDecoder::Flag::Stateless); - return toUtf16(reinterpret_cast(unicode), size*2); + return toUtf16(QByteArrayView(reinterpret_cast(unicode), size * 2)); } /*! @@ -5331,7 +5331,7 @@ QString QString::fromUcs4(const char32_t *unicode, qsizetype size) ++size; } QStringDecoder toUtf16(QStringDecoder::Utf32, QStringDecoder::Flag::Stateless); - return toUtf16(reinterpret_cast(unicode), size*4); + return toUtf16(QByteArrayView(reinterpret_cast(unicode), size * 4)); } @@ -5951,7 +5951,7 @@ int QString::compare_helper(const QChar *data1, qsizetype length1, const char *d // ### make me nothrow in all cases QVarLengthArray s2(length2); const auto beg = reinterpret_cast(s2.data()); - const auto end = QUtf8::convertToUnicode(beg, data2, length2); + const auto end = QUtf8::convertToUnicode(beg, QByteArrayView(data2, length2)); return qt_compare_strings(QStringView(data1, length1), QStringView(beg, end - beg), cs); } @@ -6423,7 +6423,7 @@ static void append_utf8(QString &qs, const char *cs, int len) { const int oldSize = qs.size(); qs.resize(oldSize + len); - const QChar *newEnd = QUtf8::convertToUnicode(qs.data() + oldSize, cs, len); + const QChar *newEnd = QUtf8::convertToUnicode(qs.data() + oldSize, QByteArrayView(cs, len)); qs.resize(newEnd - qs.constData()); } @@ -10228,9 +10228,9 @@ QString QString::toHtmlEscaped() const /*! \internal */ -void QAbstractConcatenable::appendLatin1To(const char *a, int len, QChar *out) noexcept +void QAbstractConcatenable::appendLatin1To(QLatin1String in, QChar *out) noexcept { - qt_from_latin1(reinterpret_cast(out), a, size_t(len)); + qt_from_latin1(reinterpret_cast(out), in.data(), size_t(in.size())); } double QStringView::toDouble(bool *ok) const diff --git a/src/corelib/text/qstringbuilder.cpp b/src/corelib/text/qstringbuilder.cpp index 3016392e45..dcb3644fb0 100644 --- a/src/corelib/text/qstringbuilder.cpp +++ b/src/corelib/text/qstringbuilder.cpp @@ -124,14 +124,9 @@ QT_BEGIN_NAMESPACE /*! \internal */ -void QAbstractConcatenable::convertFromAscii(const char *a, int len, QChar *&out) noexcept +void QAbstractConcatenable::convertFromUtf8(QByteArrayView in, QChar *&out) noexcept { - if (Q_UNLIKELY(len == -1)) { - if (!a) - return; - len = int(strlen(a)); - } - out = QUtf8::convertToUnicode(out, a, len); + out = QUtf8::convertToUnicode(out, in); } QT_END_NAMESPACE diff --git a/src/corelib/text/qstringbuilder.h b/src/corelib/text/qstringbuilder.h index 4037fecf69..7d35a04762 100644 --- a/src/corelib/text/qstringbuilder.h +++ b/src/corelib/text/qstringbuilder.h @@ -58,12 +58,12 @@ QT_BEGIN_NAMESPACE struct Q_CORE_EXPORT QAbstractConcatenable { protected: - static void convertFromAscii(const char *a, int len, QChar *&out) noexcept; + static void convertFromUtf8(QByteArrayView in, QChar *&out) noexcept; static inline void convertFromAscii(char a, QChar *&out) noexcept { *out++ = QLatin1Char(a); } - static void appendLatin1To(const char *a, int len, QChar *out) noexcept; + static void appendLatin1To(QLatin1String in, QChar *out) noexcept; }; template struct QConcatenable {}; @@ -237,7 +237,7 @@ template <> struct QConcatenable : private QAbstractConcatenable static qsizetype size(const QLatin1String a) { return a.size(); } static inline void appendTo(const QLatin1String a, QChar *&out) { - appendLatin1To(a.latin1(), a.size(), out); + appendLatin1To(a, out); out += a.size(); } static inline void appendTo(const QLatin1String a, char *&out) @@ -288,7 +288,7 @@ template struct QConcatenable : private QAbstractConcaten #ifndef QT_NO_CAST_FROM_ASCII QT_ASCII_CAST_WARN static inline void appendTo(const char a[N], QChar *&out) { - QAbstractConcatenable::convertFromAscii(a, N - 1, out); + QAbstractConcatenable::convertFromUtf8(QByteArrayView(a, N - 1), out); } #endif static inline void appendTo(const char a[N], char *&out) @@ -311,7 +311,7 @@ template <> struct QConcatenable : private QAbstractConcatenable static qsizetype size(const char *a) { return qstrlen(a); } #ifndef QT_NO_CAST_FROM_ASCII QT_ASCII_CAST_WARN static inline void appendTo(const char *a, QChar *&out) - { QAbstractConcatenable::convertFromAscii(a, -1, out); } + { QAbstractConcatenable::convertFromUtf8(QByteArrayView(a), out); } #endif static inline void appendTo(const char *a, char *&out) { @@ -374,7 +374,7 @@ template <> struct QConcatenable : private QAbstractConcatenable #ifndef QT_NO_CAST_FROM_ASCII QT_ASCII_CAST_WARN static inline void appendTo(const QByteArray &ba, QChar *&out) { - QAbstractConcatenable::convertFromAscii(ba.constData(), ba.size(), out); + QAbstractConcatenable::convertFromUtf8(ba, out); } #endif static inline void appendTo(const QByteArray &ba, char *&out) diff --git a/src/corelib/text/qstringconverter.cpp b/src/corelib/text/qstringconverter.cpp index 755c20d7aa..c276d807ec 100644 --- a/src/corelib/text/qstringconverter.cpp +++ b/src/corelib/text/qstringconverter.cpp @@ -484,12 +484,14 @@ static void simdCompareAscii(const char8_t *&, const char8_t *, const char16_t * enum { HeaderDone = 1 }; -QByteArray QUtf8::convertFromUnicode(const QChar *uc, qsizetype len) +QByteArray QUtf8::convertFromUnicode(QStringView in) { + qsizetype len = in.size(); + // create a QByteArray with the worst case scenario size QByteArray result(len * 3, Qt::Uninitialized); uchar *dst = reinterpret_cast(const_cast(result.constData())); - const ushort *src = reinterpret_cast(uc); + const ushort *src = reinterpret_cast(in.data()); const ushort *const end = src + len; while (src != end) { @@ -511,10 +513,10 @@ QByteArray QUtf8::convertFromUnicode(const QChar *uc, qsizetype len) return result; } -QByteArray QUtf8::convertFromUnicode(const QChar *uc, qsizetype len, QStringConverterBase::State *state) +QByteArray QUtf8::convertFromUnicode(QStringView in, QStringConverterBase::State *state) { - QByteArray ba(3*len +3, Qt::Uninitialized); - char *end = convertFromUnicode(ba.data(), QStringView(uc, len), state); + QByteArray ba(3*in.size() +3, Qt::Uninitialized); + char *end = convertFromUnicode(ba.data(), in, state); ba.truncate(end - ba.data()); return ba; } @@ -590,7 +592,7 @@ char *QUtf8::convertFromUnicode(char *out, QStringView in, QStringConverter::Sta return reinterpret_cast(cursor); } -QString QUtf8::convertToUnicode(const char *chars, qsizetype len) +QString QUtf8::convertToUnicode(QByteArrayView in) { // UTF-8 to UTF-16 always needs the exact same number of words or less: // UTF-8 UTF-16 @@ -604,9 +606,9 @@ QString QUtf8::convertToUnicode(const char *chars, qsizetype len) // // The table holds for invalid sequences too: we'll insert one replacement char // per invalid byte. - QString result(len, Qt::Uninitialized); + QString result(in.size(), Qt::Uninitialized); QChar *data = const_cast(result.constData()); // we know we're not shared - const QChar *end = convertToUnicode(data, chars, len); + const QChar *end = convertToUnicode(data, in); result.truncate(end - data); return result; } @@ -615,10 +617,10 @@ QString QUtf8::convertToUnicode(const char *chars, qsizetype len) \since 5.7 \overload - Converts the UTF-8 sequence of \a len octets beginning at \a chars to - a sequence of QChar starting at \a buffer. The buffer is expected to be - large enough to hold the result. An upper bound for the size of the - buffer is \a len QChars. + Converts the UTF-8 sequence of bytes viewed by \a in to a sequence of + QChar starting at \a buffer. The buffer is expected to be large enough + to hold the result. An upper bound for the size of the buffer is + \c in.size() QChars. If, during decoding, an error occurs, a QChar::ReplacementCharacter is written. @@ -628,18 +630,19 @@ QString QUtf8::convertToUnicode(const char *chars, qsizetype len) This function never throws. */ -QChar *QUtf8::convertToUnicode(QChar *buffer, const char *chars, qsizetype len) noexcept +QChar *QUtf8::convertToUnicode(QChar *buffer, QByteArrayView in) noexcept { ushort *dst = reinterpret_cast(buffer); - const uchar *src = reinterpret_cast(chars); - const uchar *end = src + len; + const uchar *const start = reinterpret_cast(in.data()); + const uchar *src = start; + const uchar *end = src + in.size(); // attempt to do a full decoding in SIMD const uchar *nextAscii = end; if (!simdDecodeAscii(dst, nextAscii, src, end)) { // at least one non-ASCII entry // check if we failed to decode the UTF-8 BOM; if so, skip it - if (Q_UNLIKELY(src == reinterpret_cast(chars)) + if (Q_UNLIKELY(src == start) && end - src >= 3 && Q_UNLIKELY(src[0] == utf8bom[0] && src[1] == utf8bom[1] && src[2] == utf8bom[2])) { src += 3; @@ -664,7 +667,7 @@ QChar *QUtf8::convertToUnicode(QChar *buffer, const char *chars, qsizetype len) return reinterpret_cast(dst); } -QString QUtf8::convertToUnicode(const char *chars, qsizetype len, QStringConverter::State *state) +QString QUtf8::convertToUnicode(QByteArrayView in, QStringConverter::State *state) { // See above for buffer requirements for stateless decoding. However, that // fails if the state is not empty. The following situations can add to the @@ -676,14 +679,16 @@ QString QUtf8::convertToUnicode(const char *chars, qsizetype len, QStringConvert // 1 of 2 bytes invalid continuation +1 (need to insert replacement and restart) // 2 of 3 bytes same +1 (same) // 3 of 4 bytes same +1 (same) - QString result(len + 1, Qt::Uninitialized); - QChar *end = convertToUnicode(result.data(), chars, len, state); + QString result(in.size() + 1, Qt::Uninitialized); + QChar *end = convertToUnicode(result.data(), in, state); result.truncate(end - result.constData()); return result; } -QChar *QUtf8::convertToUnicode(QChar *out, const char *chars, qsizetype len, QStringConverter::State *state) +QChar *QUtf8::convertToUnicode(QChar *out, QByteArrayView in, QStringConverter::State *state) { + qsizetype len = in.size(); + Q_ASSERT(state); if (!len) return out; @@ -697,7 +702,7 @@ QChar *QUtf8::convertToUnicode(QChar *out, const char *chars, qsizetype len, QSt uchar ch = 0; ushort *dst = reinterpret_cast(out); - const uchar *src = reinterpret_cast(chars); + const uchar *src = reinterpret_cast(in.data()); const uchar *end = src + len; if (!(state->flags & QStringConverter::Flag::Stateless)) { @@ -790,10 +795,10 @@ struct QUtf8NoOutputTraits : public QUtf8BaseTraitsNoAscii static void appendUcs4(const NoOutput &, uint) {} }; -QUtf8::ValidUtf8Result QUtf8::isValidUtf8(const char *chars, qsizetype len) +QUtf8::ValidUtf8Result QUtf8::isValidUtf8(QByteArrayView in) { - const uchar *src = reinterpret_cast(chars); - const uchar *end = src + len; + const uchar *src = reinterpret_cast(in.data()); + const uchar *end = src + in.size(); const uchar *nextAscii = src; bool isValidAscii = true; @@ -821,12 +826,12 @@ QUtf8::ValidUtf8Result QUtf8::isValidUtf8(const char *chars, qsizetype len) return { true, isValidAscii }; } -int QUtf8::compareUtf8(const char *utf8, qsizetype u8len, const QChar *utf16, qsizetype u16len) noexcept +int QUtf8::compareUtf8(QByteArrayView utf8, QStringView utf16) noexcept { - auto src1 = reinterpret_cast(utf8); - auto end1 = src1 + u8len; - auto src2 = reinterpret_cast(utf16); - auto end2 = src2 + u16len; + auto src1 = reinterpret_cast(utf8.data()); + auto end1 = src1 + utf8.size(); + auto src2 = reinterpret_cast(utf16.data()); + auto end2 = src2 + utf16.size(); do { simdCompareAscii(src1, end1, src2, end2); @@ -858,11 +863,11 @@ int QUtf8::compareUtf8(const char *utf8, qsizetype u8len, const QChar *utf16, qs return (end1 > src1) - int(end2 > src2); } -int QUtf8::compareUtf8(const char *utf8, qsizetype u8len, QLatin1String s) +int QUtf8::compareUtf8(QByteArrayView utf8, QLatin1String s) { uint uc1 = QChar::Null; - auto src1 = reinterpret_cast(utf8); - auto end1 = src1 + u8len; + auto src1 = reinterpret_cast(utf8.data()); + auto end1 = src1 + utf8.size(); auto src2 = reinterpret_cast(s.latin1()); auto end2 = src2 + s.size(); @@ -884,15 +889,15 @@ int QUtf8::compareUtf8(const char *utf8, qsizetype u8len, QLatin1String s) return (end1 > src1) - (end2 > src2); } -QByteArray QUtf16::convertFromUnicode(const QChar *uc, qsizetype len, QStringConverter::State *state, DataEndianness endian) +QByteArray QUtf16::convertFromUnicode(QStringView in, QStringConverter::State *state, DataEndianness endian) { bool writeBom = !(state->internalState & HeaderDone) && state->flags & QStringConverter::Flag::WriteBom; - qsizetype length = 2*len; + qsizetype length = 2 * in.size(); if (writeBom) length += 2; QByteArray d(length, Qt::Uninitialized); - char *end = convertFromUnicode(d.data(), QStringView(uc, len), state, endian); + char *end = convertFromUnicode(d.data(), in, state, endian); Q_ASSERT(end - d.constData() == d.length()); Q_UNUSED(end); return d; @@ -924,16 +929,19 @@ char *QUtf16::convertFromUnicode(char *out, QStringView in, QStringConverter::St return out + 2*in.length(); } -QString QUtf16::convertToUnicode(const char *chars, qsizetype len, QStringConverter::State *state, DataEndianness endian) +QString QUtf16::convertToUnicode(QByteArrayView in, QStringConverter::State *state, DataEndianness endian) { - QString result((len + 1) >> 1, Qt::Uninitialized); // worst case - QChar *qch = convertToUnicode(result.data(), chars, len, state, endian); + QString result((in.size() + 1) >> 1, Qt::Uninitialized); // worst case + QChar *qch = convertToUnicode(result.data(), in, state, endian); result.truncate(qch - result.constData()); return result; } -QChar *QUtf16::convertToUnicode(QChar *out, const char *chars, qsizetype len, QStringConverter::State *state, DataEndianness endian) +QChar *QUtf16::convertToUnicode(QChar *out, QByteArrayView in, QStringConverter::State *state, DataEndianness endian) { + qsizetype len = in.size(); + const char *chars = in.data(); + Q_ASSERT(state); if (endian == DetectEndianness) @@ -1009,14 +1017,14 @@ QChar *QUtf16::convertToUnicode(QChar *out, const char *chars, qsizetype len, QS return out; } -QByteArray QUtf32::convertFromUnicode(const QChar *uc, qsizetype len, QStringConverter::State *state, DataEndianness endian) +QByteArray QUtf32::convertFromUnicode(QStringView in, QStringConverter::State *state, DataEndianness endian) { bool writeBom = !(state->internalState & HeaderDone) && state->flags & QStringConverter::Flag::WriteBom; - int length = 4*len; + int length = 4*in.size(); if (writeBom) length += 4; QByteArray ba(length, Qt::Uninitialized); - char *end = convertFromUnicode(ba.data(), QStringView(uc, len), state, endian); + char *end = convertFromUnicode(ba.data(), in, state, endian); Q_ASSERT(end - ba.constData() == length); Q_UNUSED(end); return ba; @@ -1093,17 +1101,20 @@ decode_surrogate: return out; } -QString QUtf32::convertToUnicode(const char *chars, qsizetype len, QStringConverter::State *state, DataEndianness endian) +QString QUtf32::convertToUnicode(QByteArrayView in, QStringConverter::State *state, DataEndianness endian) { QString result; - result.resize((len + 7) >> 1); // worst case - QChar *end = convertToUnicode(result.data(), chars, len, state, endian); + result.resize((in.size() + 7) >> 1); // worst case + QChar *end = convertToUnicode(result.data(), in, state, endian); result.truncate(end - result.constData()); return result; } -QChar *QUtf32::convertToUnicode(QChar *out, const char *chars, qsizetype len, QStringConverter::State *state, DataEndianness endian) +QChar *QUtf32::convertToUnicode(QChar *out, QByteArrayView in, QStringConverter::State *state, DataEndianness endian) { + qsizetype len = in.size(); + const char *chars = in.data(); + Q_ASSERT(state); if (endian == DetectEndianness) endian = (DataEndianness)state->state_data[Endian]; @@ -1188,8 +1199,11 @@ QChar *QUtf32::convertToUnicode(QChar *out, const char *chars, qsizetype len, QS } #if defined(Q_OS_WIN) && !defined(QT_BOOTSTRAPPED) -static QString convertToUnicodeCharByChar(const char *chars, qsizetype length, QStringConverter::State *state) +static QString convertToUnicodeCharByChar(QByteArrayView in, QStringConverter::State *state) { + qsizetype length = in.size(); + const char *chars = in.data(); + Q_ASSERT(state); if (state->flags & QStringConverter::Flag::Stateless) // temporary state = nullptr; @@ -1238,10 +1252,12 @@ static QString convertToUnicodeCharByChar(const char *chars, qsizetype length, Q } -QString QLocal8Bit::convertToUnicode(const char *chars, qsizetype length, QStringConverter::State *state) +QString QLocal8Bit::convertToUnicode(QByteArrayView in, QStringConverter::State *state) { + qsizetype length = in.size(); + Q_ASSERT(length < INT_MAX); // ### FIXME - const char *mb = chars; + const char *mb = in.data(); int mblen = length; if (!mb || !mblen) @@ -1294,7 +1310,7 @@ QString QLocal8Bit::convertToUnicode(const char *chars, qsizetype length, QStrin mblen--; //check whether, we hit an invalid character in the middle if ((mblen <= 1) || (remainingChars && state_data)) - return convertToUnicodeCharByChar(chars, length, state); + return convertToUnicodeCharByChar(in, state); //Remove the last character and try again... state_data = mb[mblen-1]; remainingChars = 1; @@ -1324,8 +1340,11 @@ QString QLocal8Bit::convertToUnicode(const char *chars, qsizetype length, QStrin return s; } -QByteArray QLocal8Bit::convertFromUnicode(const QChar *ch, qsizetype uclen, QStringConverter::State *state) +QByteArray QLocal8Bit::convertFromUnicode(QStringView in, QStringConverter::State *state) { + const QChar *ch = in.data(); + qsizetype uclen = in.size(); + Q_ASSERT(uclen < INT_MAX); // ### FIXME Q_ASSERT(state); Q_UNUSED(state); // ### Fixme @@ -1375,9 +1394,9 @@ void QStringConverter::State::clear() internalState = 0; } -static QChar *fromUtf16(QChar *out, const char *in, qsizetype length, QStringConverter::State *state) +static QChar *fromUtf16(QChar *out, QByteArrayView in, QStringConverter::State *state) { - return QUtf16::convertToUnicode(out, in, length, state, DetectEndianness); + return QUtf16::convertToUnicode(out, in, state, DetectEndianness); } static char *toUtf16(char *out, QStringView in, QStringConverter::State *state) @@ -1385,9 +1404,9 @@ static char *toUtf16(char *out, QStringView in, QStringConverter::State *state) return QUtf16::convertFromUnicode(out, in, state, DetectEndianness); } -static QChar *fromUtf16BE(QChar *out, const char *in, qsizetype length, QStringConverter::State *state) +static QChar *fromUtf16BE(QChar *out, QByteArrayView in, QStringConverter::State *state) { - return QUtf16::convertToUnicode(out, in, length, state, BigEndianness); + return QUtf16::convertToUnicode(out, in, state, BigEndianness); } static char *toUtf16BE(char *out, QStringView in, QStringConverter::State *state) @@ -1395,9 +1414,9 @@ static char *toUtf16BE(char *out, QStringView in, QStringConverter::State *state return QUtf16::convertFromUnicode(out, in, state, BigEndianness); } -static QChar *fromUtf16LE(QChar *out, const char *in, qsizetype length, QStringConverter::State *state) +static QChar *fromUtf16LE(QChar *out, QByteArrayView in, QStringConverter::State *state) { - return QUtf16::convertToUnicode(out, in, length, state, LittleEndianness); + return QUtf16::convertToUnicode(out, in, state, LittleEndianness); } static char *toUtf16LE(char *out, QStringView in, QStringConverter::State *state) @@ -1405,9 +1424,9 @@ static char *toUtf16LE(char *out, QStringView in, QStringConverter::State *state return QUtf16::convertFromUnicode(out, in, state, LittleEndianness); } -static QChar *fromUtf32(QChar *out, const char *in, qsizetype length, QStringConverter::State *state) +static QChar *fromUtf32(QChar *out, QByteArrayView in, QStringConverter::State *state) { - return QUtf32::convertToUnicode(out, in, length, state, DetectEndianness); + return QUtf32::convertToUnicode(out, in, state, DetectEndianness); } static char *toUtf32(char *out, QStringView in, QStringConverter::State *state) @@ -1415,9 +1434,9 @@ static char *toUtf32(char *out, QStringView in, QStringConverter::State *state) return QUtf32::convertFromUnicode(out, in, state, DetectEndianness); } -static QChar *fromUtf32BE(QChar *out, const char *in, qsizetype length, QStringConverter::State *state) +static QChar *fromUtf32BE(QChar *out, QByteArrayView in, QStringConverter::State *state) { - return QUtf32::convertToUnicode(out, in, length, state, BigEndianness); + return QUtf32::convertToUnicode(out, in, state, BigEndianness); } static char *toUtf32BE(char *out, QStringView in, QStringConverter::State *state) @@ -1425,9 +1444,9 @@ static char *toUtf32BE(char *out, QStringView in, QStringConverter::State *state return QUtf32::convertFromUnicode(out, in, state, BigEndianness); } -static QChar *fromUtf32LE(QChar *out, const char *in, qsizetype length, QStringConverter::State *state) +static QChar *fromUtf32LE(QChar *out, QByteArrayView in, QStringConverter::State *state) { - return QUtf32::convertToUnicode(out, in, length, state, LittleEndianness); + return QUtf32::convertToUnicode(out, in, state, LittleEndianness); } static char *toUtf32LE(char *out, QStringView in, QStringConverter::State *state) @@ -1437,13 +1456,13 @@ static char *toUtf32LE(char *out, QStringView in, QStringConverter::State *state void qt_from_latin1(char16_t *dst, const char *str, size_t size) noexcept; -static QChar *fromLatin1(QChar *out, const char *chars, qsizetype len, QStringConverter::State *state) +static QChar *fromLatin1(QChar *out, QByteArrayView in, QStringConverter::State *state) { Q_ASSERT(state); Q_UNUSED(state); - qt_from_latin1(reinterpret_cast(out), chars, size_t(len)); - return out + len; + qt_from_latin1(reinterpret_cast(out), in.data(), size_t(in.size())); + return out + in.size(); } @@ -1469,16 +1488,16 @@ static char *toLatin1(char *out, QStringView in, QStringConverter::State *state) return out; } -static QChar *fromLocal8Bit(QChar *out, const char *in, qsizetype length, QStringConverter::State *state) +static QChar *fromLocal8Bit(QChar *out, QByteArrayView in, QStringConverter::State *state) { - QString s = QLocal8Bit::convertToUnicode(in, length, state); + QString s = QLocal8Bit::convertToUnicode(in, state); memcpy(out, s.constData(), s.length()*sizeof(QChar)); return out + s.length(); } static char *toLocal8Bit(char *out, QStringView in, QStringConverter::State *state) { - QByteArray s = QLocal8Bit::convertFromUnicode(in.data(), in.length(), state); + QByteArray s = QLocal8Bit::convertFromUnicode(in, state); memcpy(out, s.constData(), s.length()); return out + s.length(); } @@ -1727,16 +1746,17 @@ std::optional QStringConverter::encodingForName(cons } /*! - Returns the encoding for the content of \a buf if it can be determined. + Returns the encoding for the content of \a data if it can be determined. \a expectedFirstCharacter can be passed as an additional hint to help determine the encoding. The returned optional is empty, if the encoding is unclear. */ -std::optional QStringConverter::encodingForData(const char *buf, qsizetype arraySize, char16_t expectedFirstCharacter) +std::optional QStringConverter::encodingForData(QByteArrayView data, char16_t expectedFirstCharacter) { + qsizetype arraySize = data.size(); if (arraySize > 3) { - uint uc = qFromUnaligned(buf); + uint uc = qFromUnaligned(data.data()); if (uc == qToBigEndian(uint(QChar::ByteOrderMark))) return QStringConverter::Utf32BE; if (uc == qToLittleEndian(uint(QChar::ByteOrderMark))) @@ -1751,12 +1771,12 @@ std::optional QStringConverter::encodingForData(cons } if (arraySize > 2) { - if (memcmp(buf, utf8bom, sizeof(utf8bom)) == 0) + if (memcmp(data.data(), utf8bom, sizeof(utf8bom)) == 0) return QStringConverter::Utf8; } if (arraySize > 1) { - ushort uc = qFromUnaligned(buf); + ushort uc = qFromUnaligned(data.data()); if (uc == qToBigEndian(ushort(QChar::ByteOrderMark))) return QStringConverter::Utf16BE; if (uc == qToLittleEndian(ushort(QChar::ByteOrderMark))) @@ -1773,19 +1793,20 @@ std::optional QStringConverter::encodingForData(cons } /*! - Tries to determine the encoding of the HTML in \a buf by looking at leading byte order marks or - a charset specifier in the HTML meta tag. If the optional is empty, the encoding specified is - not supported by QStringConverter. If no encoding is detected, the method returns Utf8. - */ -std::optional QStringConverter::encodingForHtml(const char *buf, qsizetype arraySize) + Tries to determine the encoding of the HTML in \a data by looking at leading byte + order marks or a charset specifier in the HTML meta tag. If the optional is empty, + the encoding specified is not supported by QStringConverter. If no encoding is + detected, the method returns Utf8. +*/ +std::optional QStringConverter::encodingForHtml(QByteArrayView data) { // determine charset - auto encoding = encodingForData(buf, arraySize); + auto encoding = encodingForData(data); if (encoding) // trust the initial BOM return encoding; - QByteArray header = QByteArray(buf, qMin(arraySize, qsizetype(1024))).toLower(); + QByteArray header = data.first(qMin(data.size(), qsizetype(1024))).toByteArray().toLower(); int pos = header.indexOf("meta "); if (pos != -1) { pos = header.indexOf("charset=", pos); @@ -2020,14 +2041,14 @@ const char *QStringConverter::nameForEncoding(QStringConverter::Encoding e) */ /*! - \fn QChar *QStringDecoder::appendToBuffer(QChar *out, const char *in, qsizetype length) + \fn QChar *QStringDecoder::appendToBuffer(QChar *out, QByteArrayView in) - Decodes \a length bytes from \a in and writes the decoded result into the buffer - starting at \a out. Returns a pointer to the end of data written. + Decodes the sequence of bytes viewed by \a in and writes the decoded result into + the buffer starting at \a out. Returns a pointer to the end of data written. \a out needs to be large enough to be able to hold all the decoded data. Use \l{requiredSpace} to determine the maximum size requirements to decode an encoded - data buffer of \a length. + data buffer of \c in.size() bytes. \sa requiredSpace */ diff --git a/src/corelib/text/qstringconverter.h b/src/corelib/text/qstringconverter.h index 9eba8bc437..829b19d568 100644 --- a/src/corelib/text/qstringconverter.h +++ b/src/corelib/text/qstringconverter.h @@ -142,8 +142,7 @@ protected: struct Interface { - // ### FIXME: need a QByteArrayView - using DecoderFn = QChar * (*)(QChar *out, const char *in, qsizetype length, State *state); + using DecoderFn = QChar * (*)(QChar *out, QByteArrayView in, State *state); using LengthFn = qsizetype (*)(qsizetype inLength); using EncoderFn = char * (*)(char *out, QStringView in, State *state); const char *name = nullptr; @@ -179,8 +178,8 @@ public: Q_CORE_EXPORT static std::optional encodingForName(const char *name); Q_CORE_EXPORT static const char *nameForEncoding(Encoding e); - Q_CORE_EXPORT static std::optional encodingForData(const char *buf, qsizetype arraySize, char16_t expectedFirstCharacter = 0); - Q_CORE_EXPORT static std::optional encodingForHtml(const char *buf, qsizetype arraySize); + Q_CORE_EXPORT static std::optional encodingForData(QByteArrayView data, char16_t expectedFirstCharacter = 0); + Q_CORE_EXPORT static std::optional encodingForHtml(QByteArrayView data); protected: const Interface *iface; @@ -209,36 +208,32 @@ public: #if defined(Q_QDOC) QByteArray operator()(const QString &in); QByteArray operator()(QStringView in); - QByteArray operator()(const QChar *in, qsizetype length); QByteArray encode(const QString &in); QByteArray encode(QStringView in); - QByteArray encode(const QChar *in, qsizetype length); #else template struct DecodedData { QStringEncoder *encoder; T data; - operator QByteArray() const { return encoder->encodeAsByteArray(QStringView(data)); } + operator QByteArray() const { return encoder->encodeAsByteArray(data); } }; + Q_WEAK_OVERLOAD DecodedData operator()(const QString &str) { return DecodedData{this, str}; } DecodedData operator()(QStringView in) { return DecodedData{this, in}; } - DecodedData operator()(const QChar *in, qsizetype length) - { return (*this)(QStringView(in, length)); } + Q_WEAK_OVERLOAD DecodedData encode(const QString &str) { return DecodedData{this, str}; } DecodedData encode(QStringView in) { return DecodedData{this, in}; } - DecodedData encode(const QChar *in, qsizetype length) - { return (*this)(QStringView(in, length)); } #endif qsizetype requiredSpace(qsizetype inputLength) const { return iface->fromUtf16Len(inputLength); } - char *appendToBuffer(char *out, const QChar *in, qsizetype length) - { return iface->fromUtf16(out, QStringView(in, length), &state); } + char *appendToBuffer(char *out, QStringView in) + { return iface->fromUtf16(out, in, &state); } private: QByteArray encodeAsByteArray(QStringView in) { @@ -253,13 +248,6 @@ private: class QStringDecoder : public QStringConverter { - struct View { - const char *ch; - qsizetype l; - const char *data() const { return ch; } - qsizetype length() const { return l; } - }; - protected: QSTRINGCONVERTER_CONSTEXPR QStringDecoder(const Interface *i) : QStringConverter(i) @@ -277,44 +265,38 @@ public: #if defined(Q_QDOC) QString operator()(const QByteArray &ba); - QString operator()(const char *in, qsizetype size); - QString operator()(const char *chars); + QString operator()(QByteArrayView ba); QString decode(const QByteArray &ba); - QString decode(const char *in, qsizetype size); - QString decode(const char *chars); + QString decode(QByteArrayView ba); #else template struct EncodedData { QStringDecoder *decoder; T data; - operator QString() const { return decoder->decodeAsString(data.data(), data.length()); } + operator QString() const { return decoder->decodeAsString(data); } }; + Q_WEAK_OVERLOAD EncodedData operator()(const QByteArray &ba) { return EncodedData{this, ba}; } - EncodedData operator()(const char *in, qsizetype length) - { return EncodedData{this, {in, length}}; } - EncodedData operator()(const char *chars) - { return EncodedData{this, {chars, qsizetype(strlen(chars))}}; } + EncodedData operator()(QByteArrayView ba) + { return EncodedData{this, ba}; } + Q_WEAK_OVERLOAD EncodedData decode(const QByteArray &ba) { return EncodedData{this, ba}; } - EncodedData decode(const char *in, qsizetype length) - { return EncodedData{this, {in, length}}; } - EncodedData decode(const char *chars) - { return EncodedData{this, {chars, qsizetype(strlen(chars))}}; } + EncodedData decode(QByteArrayView ba) + { return EncodedData{this, ba}; } #endif qsizetype requiredSpace(qsizetype inputLength) const { return iface->toUtf16Len(inputLength); } - QChar *appendToBuffer(QChar *out, const char *in, qsizetype length) - { return iface->toUtf16(out, in, length, &state); } + QChar *appendToBuffer(QChar *out, QByteArrayView ba) + { return iface->toUtf16(out, ba, &state); } private: - QString decodeAsString(const char *in, qsizetype length) + QString decodeAsString(QByteArrayView in) { - QString result(iface->toUtf16Len(length), Qt::Uninitialized); - QChar *out = result.data(); - // ### Fixme: state handling needs to be moved into the conversion methods - out = iface->toUtf16(out, in, length, &state); + QString result(iface->toUtf16Len(in.size()), Qt::Uninitialized); + const QChar *out = iface->toUtf16(result.data(), in, &state); result.truncate(out - result.constData()); return result; } @@ -329,10 +311,10 @@ struct QConcatenable> typedef QChar type; typedef QString ConvertTo; enum { ExactSize = false }; - static qsizetype size(const QStringDecoder::EncodedData &s) { return s.decoder->requiredSpace(s.data.length()); } + static qsizetype size(const QStringDecoder::EncodedData &s) { return s.decoder->requiredSpace(s.data.size()); } static inline void appendTo(const QStringDecoder::EncodedData &s, QChar *&out) { - out = s.decoder->appendToBuffer(out, s.data.data(), s.data.length()); + out = s.decoder->appendToBuffer(out, s.data); } }; @@ -343,10 +325,10 @@ struct QConcatenable> typedef char type; typedef QByteArray ConvertTo; enum { ExactSize = false }; - static qsizetype size(const QStringEncoder::DecodedData &s) { return s.encoder->requiredSpace(s.data.length()); } + static qsizetype size(const QStringEncoder::DecodedData &s) { return s.encoder->requiredSpace(s.data.size()); } static inline void appendTo(const QStringEncoder::DecodedData &s, char *&out) { - out = s.encoder->appendToBuffer(out, s.data.data(), s.data.length()); + out = s.encoder->appendToBuffer(out, s.data); } }; diff --git a/src/corelib/text/qstringconverter_p.h b/src/corelib/text/qstringconverter_p.h index 2130bea6e8..242f3f0303 100644 --- a/src/corelib/text/qstringconverter_p.h +++ b/src/corelib/text/qstringconverter_p.h @@ -326,48 +326,48 @@ enum DataEndianness struct QUtf8 { - Q_CORE_EXPORT static QChar *convertToUnicode(QChar *, const char *, qsizetype) noexcept; - static QString convertToUnicode(const char *, qsizetype); - Q_CORE_EXPORT static QString convertToUnicode(const char *, qsizetype, QStringConverter::State *); - static QChar *convertToUnicode(QChar *out, const char *in, qsizetype length, QStringConverter::State *state); - Q_CORE_EXPORT static QByteArray convertFromUnicode(const QChar *, qsizetype); - Q_CORE_EXPORT static QByteArray convertFromUnicode(const QChar *, qsizetype, QStringConverter::State *); + Q_CORE_EXPORT static QChar *convertToUnicode(QChar *buffer, QByteArrayView in) noexcept; + static QString convertToUnicode(QByteArrayView in); + Q_CORE_EXPORT static QString convertToUnicode(QByteArrayView in, QStringConverter::State *state); + static QChar *convertToUnicode(QChar *out, QByteArrayView in, QStringConverter::State *state); + Q_CORE_EXPORT static QByteArray convertFromUnicode(QStringView in); + Q_CORE_EXPORT static QByteArray convertFromUnicode(QStringView in, QStringConverterBase::State *state); static char *convertFromUnicode(char *out, QStringView in, QStringConverter::State *state); struct ValidUtf8Result { bool isValidUtf8; bool isValidAscii; }; - static ValidUtf8Result isValidUtf8(const char *, qsizetype); - static int compareUtf8(const char *, qsizetype, const QChar *, qsizetype) noexcept; - static int compareUtf8(const char *, qsizetype, QLatin1String s); + static ValidUtf8Result isValidUtf8(QByteArrayView in); + static int compareUtf8(QByteArrayView utf8, QStringView utf16) noexcept; + static int compareUtf8(QByteArrayView utf8, QLatin1String s); }; struct QUtf16 { - Q_CORE_EXPORT static QString convertToUnicode(const char *, qsizetype, QStringConverter::State *, DataEndianness = DetectEndianness); - static QChar *convertToUnicode(QChar *out, const char *chars, qsizetype len, QStringConverter::State *state, DataEndianness endian); - Q_CORE_EXPORT static QByteArray convertFromUnicode(const QChar *, qsizetype, QStringConverter::State *, DataEndianness = DetectEndianness); + Q_CORE_EXPORT static QString convertToUnicode(QByteArrayView, QStringConverter::State *, DataEndianness = DetectEndianness); + static QChar *convertToUnicode(QChar *out, QByteArrayView, QStringConverter::State *state, DataEndianness endian); + Q_CORE_EXPORT static QByteArray convertFromUnicode(QStringView, QStringConverter::State *, DataEndianness = DetectEndianness); static char *convertFromUnicode(char *out, QStringView in, QStringConverter::State *state, DataEndianness endian); }; struct QUtf32 { - static QChar *convertToUnicode(QChar *out, const char *chars, qsizetype len, QStringConverter::State *state, DataEndianness endian); - Q_CORE_EXPORT static QString convertToUnicode(const char *, qsizetype, QStringConverter::State *, DataEndianness = DetectEndianness); - Q_CORE_EXPORT static QByteArray convertFromUnicode(const QChar *, qsizetype, QStringConverter::State *, DataEndianness = DetectEndianness); + static QChar *convertToUnicode(QChar *out, QByteArrayView, QStringConverter::State *state, DataEndianness endian); + Q_CORE_EXPORT static QString convertToUnicode(QByteArrayView, QStringConverter::State *, DataEndianness = DetectEndianness); + Q_CORE_EXPORT static QByteArray convertFromUnicode(QStringView, QStringConverter::State *, DataEndianness = DetectEndianness); static char *convertFromUnicode(char *out, QStringView in, QStringConverter::State *state, DataEndianness endian); }; struct Q_CORE_EXPORT QLocal8Bit { #if !defined(Q_OS_WIN) || defined(QT_BOOTSTRAPPED) - static QString convertToUnicode(const char *chars, qsizetype len, QStringConverter::State *state) - { return QUtf8::convertToUnicode(chars, len, state); } - static QByteArray convertFromUnicode(const QChar *chars, qsizetype len, QStringConverter::State *state) - { return QUtf8::convertFromUnicode(chars, len, state); } + static QString convertToUnicode(QByteArrayView in, QStringConverter::State *state) + { return QUtf8::convertToUnicode(in, state); } + static QByteArray convertFromUnicode(QStringView in, QStringConverter::State *state) + { return QUtf8::convertFromUnicode(in, state); } #else - static QString convertToUnicode(const char *, qsizetype, QStringConverter::State *); - static QByteArray convertFromUnicode(const QChar *, qsizetype, QStringConverter::State *); + static QString convertToUnicode(QByteArrayView, QStringConverter::State *); + static QByteArray convertFromUnicode(QStringView, QStringConverter::State *); #endif }; -- cgit v1.2.3