diff options
author | Lars Knoll <lars.knoll@qt.io> | 2020-04-23 11:34:01 +0200 |
---|---|---|
committer | Lars Knoll <lars.knoll@qt.io> | 2020-05-14 07:47:53 +0200 |
commit | d8997ad797f8843e2632257586a5611e8dde300a (patch) | |
tree | b6a2a9bdc335839d11961bb9219601872435a736 /src/corelib/text/qstringconverter.cpp | |
parent | 5dcfd0ac2f3aa5c9f24022968827cc93d9839b45 (diff) |
Clean up QUtf16::convertTo/FromUnicode
Clean up the method, and refactor it so we can avoid one
copy of the data when using QStringConverter.
Make the conversion to unicode more by avoiding conditions in
the inner loop and doing a memcpy if endianness matches.
Change-Id: I869daf861f886d69b67a1b223ac2238498b609ac
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Diffstat (limited to 'src/corelib/text/qstringconverter.cpp')
-rw-r--r-- | src/corelib/text/qstringconverter.cpp | 191 |
1 files changed, 94 insertions, 97 deletions
diff --git a/src/corelib/text/qstringconverter.cpp b/src/corelib/text/qstringconverter.cpp index 3676f4c8b5..d528df8cc8 100644 --- a/src/corelib/text/qstringconverter.cpp +++ b/src/corelib/text/qstringconverter.cpp @@ -773,118 +773,127 @@ int QUtf8::compareUtf8(const char *utf8, qsizetype u8len, QLatin1String s) QByteArray QUtf16::convertFromUnicode(const QChar *uc, qsizetype len, QStringConverter::State *state, DataEndianness endian) { - Q_ASSERT(state); - if (state->flags & QStringConverter::Flag::Stateless) // temporary - state = nullptr; - - bool writeBom = state && !(state->internalState & HeaderDone) && state->flags & QStringConverter::Flag::WriteBom; + bool writeBom = !(state->internalState & HeaderDone) && state->flags & QStringConverter::Flag::WriteBom; qsizetype length = 2*len; if (writeBom) length += 2; + QByteArray d(length, Qt::Uninitialized); + char *end = convertFromUnicode(d.data(), QStringView(uc, len), state, endian); + Q_ASSERT(end - d.constData() == d.length()); + Q_UNUSED(end); + return d; +} + +char *QUtf16::convertFromUnicode(char *out, QStringView in, QStringConverter::State *state, DataEndianness endian) +{ + Q_ASSERT(state); + bool writeBom = !(state->internalState & HeaderDone) && state->flags & QStringConverter::Flag::WriteBom; + if (endian == DetectEndianness) endian = (QSysInfo::ByteOrder == QSysInfo::BigEndian) ? BigEndianness : LittleEndianness; - QByteArray d; - d.resize(length); - char *data = d.data(); if (writeBom) { QChar bom(QChar::ByteOrderMark); if (endian == BigEndianness) - qToBigEndian(bom.unicode(), data); + qToBigEndian(bom.unicode(), out); else - qToLittleEndian(bom.unicode(), data); - data += 2; + qToLittleEndian(bom.unicode(), out); + out += 2; } if (endian == BigEndianness) - qToBigEndian<ushort>(uc, len, data); + qToBigEndian<ushort>(in.data(), in.length(), out); else - qToLittleEndian<ushort>(uc, len, data); + qToLittleEndian<ushort>(in.data(), in.length(), out); - if (state) { - state->remainingChars = 0; - state->internalState |= HeaderDone; - } - return d; + state->remainingChars = 0; + state->internalState |= HeaderDone; + return out + 2*in.length(); } QString QUtf16::convertToUnicode(const char *chars, qsizetype len, QStringConverter::State *state, DataEndianness endian) { + QString result((len + 1) >> 1, Qt::Uninitialized); // worst case + QChar *qch = convertToUnicode(result.data(), chars, len, state, endian); + result.truncate(qch - result.constData()); + return result; +} + +QChar *QUtf16::convertToUnicode(QChar *out, const char *chars, qsizetype len, QStringConverter::State *state, DataEndianness endian) +{ Q_ASSERT(state); - if (state->flags & QStringConverter::Flag::Stateless) // temporary - state = nullptr; - bool half = false; - uchar buf = 0; - bool headerdone = state && state->internalState & HeaderDone; - if (state) { - if (state->flags & QStringConverter::Flag::DontSkipInitialBom) - headerdone = true; - if (endian == DetectEndianness) - endian = (DataEndianness)state->state_data[Endian]; - if (state->remainingChars) { - half = true; - buf = state->state_data[Data]; + if (endian == DetectEndianness) + endian = (DataEndianness)state->state_data[Endian]; + + const char *end = chars + len; + + // make sure we can decode at least one char + if (state->remainingChars + len < 2) { + if (len) { + Q_ASSERT(state->remainingChars == 0 && len == 1); + state->remainingChars = 1; + state->state_data[Data] = *chars; } + return out; } - if (headerdone && endian == DetectEndianness) - endian = (QSysInfo::ByteOrder == QSysInfo::BigEndian) ? BigEndianness : LittleEndianness; - QString result(len, Qt::Uninitialized); // worst case - QChar *qch = (QChar *)result.data(); - while (len--) { - if (half) { - QChar ch; - if (endian == LittleEndianness) { - ch.setRow(*chars++); - ch.setCell(buf); + bool headerdone = state && state->internalState & HeaderDone; + if (state->flags & QStringConverter::Flag::DontSkipInitialBom) + headerdone = true; + + if (!headerdone || state->remainingChars) { + uchar buf; + if (state->remainingChars) + buf = state->state_data[Data]; + else + buf = *chars++; + + // detect BOM, set endianness + state->internalState |= HeaderDone; + QChar ch(buf, *chars++); + if (endian == DetectEndianness) { + if (ch == QChar::ByteOrderSwapped) { + endian = BigEndianness; + } else if (ch == QChar::ByteOrderMark) { + endian = LittleEndianness; } else { - ch.setRow(buf); - ch.setCell(*chars++); - } - if (!headerdone) { - headerdone = true; - if (endian == DetectEndianness) { - if (ch == QChar::ByteOrderSwapped) { - endian = LittleEndianness; - } else if (ch == QChar::ByteOrderMark) { - endian = BigEndianness; - } else { - if (QSysInfo::ByteOrder == QSysInfo::BigEndian) { - endian = BigEndianness; - } else { - endian = LittleEndianness; - ch = QChar::fromUcs2((ch.unicode() >> 8) | ((ch.unicode() & 0xff) << 8)); - } - *qch++ = ch; - } - } else if (ch != QChar::ByteOrderMark) { - *qch++ = ch; + if (QSysInfo::ByteOrder == QSysInfo::BigEndian) { + endian = BigEndianness; + } else { + endian = LittleEndianness; } - } else { - *qch++ = ch; } - half = false; - } else { - buf = *chars++; - half = true; } + if (endian == BigEndianness) + ch = QChar::fromUcs2((ch.unicode() >> 8) | ((ch.unicode() & 0xff) << 8)); + if (headerdone || ch != QChar::ByteOrderMark) + *out++ = ch; + } else if (endian == DetectEndianness) { + endian = (QSysInfo::ByteOrder == QSysInfo::BigEndian) ? BigEndianness : LittleEndianness; } - result.truncate(qch - result.unicode()); - if (state) { - if (headerdone) - state->internalState |= HeaderDone; - state->state_data[Endian] = endian; - if (half) { - state->remainingChars = 1; - state->state_data[Data] = buf; + int nPairs = (end - chars) >> 1; + if (endian == BigEndianness) + qFromBigEndian<ushort>(chars, nPairs, out); + else + qFromLittleEndian<ushort>(chars, nPairs, out); + out += nPairs; + + state->state_data[Endian] = endian; + state->remainingChars = 0; + if ((end - chars) & 1) { + if (state->flags & QStringConverter::Flag::Stateless) { + *out++ = state->flags & QStringConverter::Flag::ConvertInvalidToNull ? QChar::Null : QChar::ReplacementCharacter; } else { - state->remainingChars = 0; - state->state_data[Data] = 0; + state->remainingChars = 1; + state->state_data[Data] = *(end - 1); } + } else { + state->state_data[Data] = 0; } - return result; + + return out; } QByteArray QUtf32::convertFromUnicode(const QChar *uc, qsizetype len, QStringConverter::State *state, DataEndianness endian) @@ -1253,44 +1262,32 @@ void QStringConverter::State::clear() static QChar *fromUtf16(QChar *out, const char *in, qsizetype length, QStringConverter::State *state) { - QString s = QUtf16::convertToUnicode(in, length, state); - memcpy(out, s.constData(), s.length()*sizeof(QChar)); - return out + s.length(); + return QUtf16::convertToUnicode(out, in, length, state, DetectEndianness); } static char *toUtf16(char *out, QStringView in, QStringConverter::State *state) { - QByteArray s = QUtf16::convertFromUnicode(in.data(), in.length(), state); - memcpy(out, s.constData(), s.length()); - return out + s.length(); + return QUtf16::convertFromUnicode(out, in, state, DetectEndianness); } static QChar *fromUtf16BE(QChar *out, const char *in, qsizetype length, QStringConverter::State *state) { - QString s = QUtf16::convertToUnicode(in, length, state, BigEndianness); - memcpy(out, s.constData(), s.length()*sizeof(QChar)); - return out + s.length(); + return QUtf16::convertToUnicode(out, in, length, state, BigEndianness); } static char *toUtf16BE(char *out, QStringView in, QStringConverter::State *state) { - QByteArray s = QUtf16::convertFromUnicode(in.data(), in.length(), state, BigEndianness); - memcpy(out, s.constData(), s.length()); - return out + s.length(); + return QUtf16::convertFromUnicode(out, in, state, BigEndianness); } static QChar *fromUtf16LE(QChar *out, const char *in, qsizetype length, QStringConverter::State *state) { - QString s = QUtf16::convertToUnicode(in, length, state, LittleEndianness); - memcpy(out, s.constData(), s.length()*sizeof(QChar)); - return out + s.length(); + return QUtf16::convertToUnicode(out, in, length, state, LittleEndianness); } static char *toUtf16LE(char *out, QStringView in, QStringConverter::State *state) { - QByteArray s = QUtf16::convertFromUnicode(in.data(), in.length(), state, LittleEndianness); - memcpy(out, s.constData(), s.length()); - return out + s.length(); + return QUtf16::convertFromUnicode(out, in, state, LittleEndianness); } static QChar *fromUtf32(QChar *out, const char *in, qsizetype length, QStringConverter::State *state) |