summaryrefslogtreecommitdiffstats
path: root/src/corelib/text/qstringconverter.cpp
diff options
context:
space:
mode:
authorLars Knoll <lars.knoll@qt.io>2020-04-23 11:34:01 +0200
committerLars Knoll <lars.knoll@qt.io>2020-05-14 07:47:53 +0200
commitd8997ad797f8843e2632257586a5611e8dde300a (patch)
treeb6a2a9bdc335839d11961bb9219601872435a736 /src/corelib/text/qstringconverter.cpp
parent5dcfd0ac2f3aa5c9f24022968827cc93d9839b45 (diff)
Clean up QUtf16::convertTo/FromUnicode
Clean up the method, and refactor it so we can avoid one copy of the data when using QStringConverter. Make the conversion to unicode more by avoiding conditions in the inner loop and doing a memcpy if endianness matches. Change-Id: I869daf861f886d69b67a1b223ac2238498b609ac Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Diffstat (limited to 'src/corelib/text/qstringconverter.cpp')
-rw-r--r--src/corelib/text/qstringconverter.cpp191
1 files changed, 94 insertions, 97 deletions
diff --git a/src/corelib/text/qstringconverter.cpp b/src/corelib/text/qstringconverter.cpp
index 3676f4c8b5..d528df8cc8 100644
--- a/src/corelib/text/qstringconverter.cpp
+++ b/src/corelib/text/qstringconverter.cpp
@@ -773,118 +773,127 @@ int QUtf8::compareUtf8(const char *utf8, qsizetype u8len, QLatin1String s)
QByteArray QUtf16::convertFromUnicode(const QChar *uc, qsizetype len, QStringConverter::State *state, DataEndianness endian)
{
- Q_ASSERT(state);
- if (state->flags & QStringConverter::Flag::Stateless) // temporary
- state = nullptr;
-
- bool writeBom = state && !(state->internalState & HeaderDone) && state->flags & QStringConverter::Flag::WriteBom;
+ bool writeBom = !(state->internalState & HeaderDone) && state->flags & QStringConverter::Flag::WriteBom;
qsizetype length = 2*len;
if (writeBom)
length += 2;
+ QByteArray d(length, Qt::Uninitialized);
+ char *end = convertFromUnicode(d.data(), QStringView(uc, len), state, endian);
+ Q_ASSERT(end - d.constData() == d.length());
+ Q_UNUSED(end);
+ return d;
+}
+
+char *QUtf16::convertFromUnicode(char *out, QStringView in, QStringConverter::State *state, DataEndianness endian)
+{
+ Q_ASSERT(state);
+ bool writeBom = !(state->internalState & HeaderDone) && state->flags & QStringConverter::Flag::WriteBom;
+
if (endian == DetectEndianness)
endian = (QSysInfo::ByteOrder == QSysInfo::BigEndian) ? BigEndianness : LittleEndianness;
- QByteArray d;
- d.resize(length);
- char *data = d.data();
if (writeBom) {
QChar bom(QChar::ByteOrderMark);
if (endian == BigEndianness)
- qToBigEndian(bom.unicode(), data);
+ qToBigEndian(bom.unicode(), out);
else
- qToLittleEndian(bom.unicode(), data);
- data += 2;
+ qToLittleEndian(bom.unicode(), out);
+ out += 2;
}
if (endian == BigEndianness)
- qToBigEndian<ushort>(uc, len, data);
+ qToBigEndian<ushort>(in.data(), in.length(), out);
else
- qToLittleEndian<ushort>(uc, len, data);
+ qToLittleEndian<ushort>(in.data(), in.length(), out);
- if (state) {
- state->remainingChars = 0;
- state->internalState |= HeaderDone;
- }
- return d;
+ state->remainingChars = 0;
+ state->internalState |= HeaderDone;
+ return out + 2*in.length();
}
QString QUtf16::convertToUnicode(const char *chars, qsizetype len, QStringConverter::State *state, DataEndianness endian)
{
+ QString result((len + 1) >> 1, Qt::Uninitialized); // worst case
+ QChar *qch = convertToUnicode(result.data(), chars, len, state, endian);
+ result.truncate(qch - result.constData());
+ return result;
+}
+
+QChar *QUtf16::convertToUnicode(QChar *out, const char *chars, qsizetype len, QStringConverter::State *state, DataEndianness endian)
+{
Q_ASSERT(state);
- if (state->flags & QStringConverter::Flag::Stateless) // temporary
- state = nullptr;
- bool half = false;
- uchar buf = 0;
- bool headerdone = state && state->internalState & HeaderDone;
- if (state) {
- if (state->flags & QStringConverter::Flag::DontSkipInitialBom)
- headerdone = true;
- if (endian == DetectEndianness)
- endian = (DataEndianness)state->state_data[Endian];
- if (state->remainingChars) {
- half = true;
- buf = state->state_data[Data];
+ if (endian == DetectEndianness)
+ endian = (DataEndianness)state->state_data[Endian];
+
+ const char *end = chars + len;
+
+ // make sure we can decode at least one char
+ if (state->remainingChars + len < 2) {
+ if (len) {
+ Q_ASSERT(state->remainingChars == 0 && len == 1);
+ state->remainingChars = 1;
+ state->state_data[Data] = *chars;
}
+ return out;
}
- if (headerdone && endian == DetectEndianness)
- endian = (QSysInfo::ByteOrder == QSysInfo::BigEndian) ? BigEndianness : LittleEndianness;
- QString result(len, Qt::Uninitialized); // worst case
- QChar *qch = (QChar *)result.data();
- while (len--) {
- if (half) {
- QChar ch;
- if (endian == LittleEndianness) {
- ch.setRow(*chars++);
- ch.setCell(buf);
+ bool headerdone = state && state->internalState & HeaderDone;
+ if (state->flags & QStringConverter::Flag::DontSkipInitialBom)
+ headerdone = true;
+
+ if (!headerdone || state->remainingChars) {
+ uchar buf;
+ if (state->remainingChars)
+ buf = state->state_data[Data];
+ else
+ buf = *chars++;
+
+ // detect BOM, set endianness
+ state->internalState |= HeaderDone;
+ QChar ch(buf, *chars++);
+ if (endian == DetectEndianness) {
+ if (ch == QChar::ByteOrderSwapped) {
+ endian = BigEndianness;
+ } else if (ch == QChar::ByteOrderMark) {
+ endian = LittleEndianness;
} else {
- ch.setRow(buf);
- ch.setCell(*chars++);
- }
- if (!headerdone) {
- headerdone = true;
- if (endian == DetectEndianness) {
- if (ch == QChar::ByteOrderSwapped) {
- endian = LittleEndianness;
- } else if (ch == QChar::ByteOrderMark) {
- endian = BigEndianness;
- } else {
- if (QSysInfo::ByteOrder == QSysInfo::BigEndian) {
- endian = BigEndianness;
- } else {
- endian = LittleEndianness;
- ch = QChar::fromUcs2((ch.unicode() >> 8) | ((ch.unicode() & 0xff) << 8));
- }
- *qch++ = ch;
- }
- } else if (ch != QChar::ByteOrderMark) {
- *qch++ = ch;
+ if (QSysInfo::ByteOrder == QSysInfo::BigEndian) {
+ endian = BigEndianness;
+ } else {
+ endian = LittleEndianness;
}
- } else {
- *qch++ = ch;
}
- half = false;
- } else {
- buf = *chars++;
- half = true;
}
+ if (endian == BigEndianness)
+ ch = QChar::fromUcs2((ch.unicode() >> 8) | ((ch.unicode() & 0xff) << 8));
+ if (headerdone || ch != QChar::ByteOrderMark)
+ *out++ = ch;
+ } else if (endian == DetectEndianness) {
+ endian = (QSysInfo::ByteOrder == QSysInfo::BigEndian) ? BigEndianness : LittleEndianness;
}
- result.truncate(qch - result.unicode());
- if (state) {
- if (headerdone)
- state->internalState |= HeaderDone;
- state->state_data[Endian] = endian;
- if (half) {
- state->remainingChars = 1;
- state->state_data[Data] = buf;
+ int nPairs = (end - chars) >> 1;
+ if (endian == BigEndianness)
+ qFromBigEndian<ushort>(chars, nPairs, out);
+ else
+ qFromLittleEndian<ushort>(chars, nPairs, out);
+ out += nPairs;
+
+ state->state_data[Endian] = endian;
+ state->remainingChars = 0;
+ if ((end - chars) & 1) {
+ if (state->flags & QStringConverter::Flag::Stateless) {
+ *out++ = state->flags & QStringConverter::Flag::ConvertInvalidToNull ? QChar::Null : QChar::ReplacementCharacter;
} else {
- state->remainingChars = 0;
- state->state_data[Data] = 0;
+ state->remainingChars = 1;
+ state->state_data[Data] = *(end - 1);
}
+ } else {
+ state->state_data[Data] = 0;
}
- return result;
+
+ return out;
}
QByteArray QUtf32::convertFromUnicode(const QChar *uc, qsizetype len, QStringConverter::State *state, DataEndianness endian)
@@ -1253,44 +1262,32 @@ void QStringConverter::State::clear()
static QChar *fromUtf16(QChar *out, const char *in, qsizetype length, QStringConverter::State *state)
{
- QString s = QUtf16::convertToUnicode(in, length, state);
- memcpy(out, s.constData(), s.length()*sizeof(QChar));
- return out + s.length();
+ return QUtf16::convertToUnicode(out, in, length, state, DetectEndianness);
}
static char *toUtf16(char *out, QStringView in, QStringConverter::State *state)
{
- QByteArray s = QUtf16::convertFromUnicode(in.data(), in.length(), state);
- memcpy(out, s.constData(), s.length());
- return out + s.length();
+ return QUtf16::convertFromUnicode(out, in, state, DetectEndianness);
}
static QChar *fromUtf16BE(QChar *out, const char *in, qsizetype length, QStringConverter::State *state)
{
- QString s = QUtf16::convertToUnicode(in, length, state, BigEndianness);
- memcpy(out, s.constData(), s.length()*sizeof(QChar));
- return out + s.length();
+ return QUtf16::convertToUnicode(out, in, length, state, BigEndianness);
}
static char *toUtf16BE(char *out, QStringView in, QStringConverter::State *state)
{
- QByteArray s = QUtf16::convertFromUnicode(in.data(), in.length(), state, BigEndianness);
- memcpy(out, s.constData(), s.length());
- return out + s.length();
+ return QUtf16::convertFromUnicode(out, in, state, BigEndianness);
}
static QChar *fromUtf16LE(QChar *out, const char *in, qsizetype length, QStringConverter::State *state)
{
- QString s = QUtf16::convertToUnicode(in, length, state, LittleEndianness);
- memcpy(out, s.constData(), s.length()*sizeof(QChar));
- return out + s.length();
+ return QUtf16::convertToUnicode(out, in, length, state, LittleEndianness);
}
static char *toUtf16LE(char *out, QStringView in, QStringConverter::State *state)
{
- QByteArray s = QUtf16::convertFromUnicode(in.data(), in.length(), state, LittleEndianness);
- memcpy(out, s.constData(), s.length());
- return out + s.length();
+ return QUtf16::convertFromUnicode(out, in, state, LittleEndianness);
}
static QChar *fromUtf32(QChar *out, const char *in, qsizetype length, QStringConverter::State *state)