summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLars Knoll <lars.knoll@qt.io>2020-04-23 12:38:00 +0200
committerLars Knoll <lars.knoll@qt.io>2020-05-14 07:48:00 +0200
commitb1d8ce32cd4dc7dc021aa9f0c05048753f04dd6c (patch)
tree06dd97e10cd11c3e1044fe0e1627d0c1fc5a9608
parentd8997ad797f8843e2632257586a5611e8dde300a (diff)
Refactor QUtf32::convertFromUnicode
Implement proper state handling, and avoid a copy when using it through QStringConverter. Change-Id: I201fe966601c424c337e452e359a2e71f76354ad Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
-rw-r--r--src/corelib/text/qstringconverter.cpp104
-rw-r--r--src/corelib/text/qstringconverter_p.h1
2 files changed, 65 insertions, 40 deletions
diff --git a/src/corelib/text/qstringconverter.cpp b/src/corelib/text/qstringconverter.cpp
index d528df8cc8..e87b4704c0 100644
--- a/src/corelib/text/qstringconverter.cpp
+++ b/src/corelib/text/qstringconverter.cpp
@@ -898,11 +898,22 @@ QChar *QUtf16::convertToUnicode(QChar *out, const char *chars, qsizetype len, QS
QByteArray QUtf32::convertFromUnicode(const QChar *uc, qsizetype len, QStringConverter::State *state, DataEndianness endian)
{
+ bool writeBom = !(state->internalState & HeaderDone) && state->flags & QStringConverter::Flag::WriteBom;
+ int length = 4*len;
+ if (writeBom)
+ length += 4;
+ QByteArray ba(length, Qt::Uninitialized);
+ char *end = convertFromUnicode(ba.data(), uc, len, state, endian);
+ Q_ASSERT(end - ba.constData() == length);
+ Q_UNUSED(end);
+ return ba;
+}
+
+char *QUtf32::convertFromUnicode(char *out, const QChar *uc, qsizetype len, QStringConverter::State *state, DataEndianness endian)
+{
Q_ASSERT(state);
- if (state->flags & QStringConverter::Flag::Stateless) // temporary
- state = nullptr;
- bool writeBom = state && !(state->internalState & HeaderDone) && state->flags & QStringConverter::Flag::WriteBom;
+ bool writeBom = !(state->internalState & HeaderDone) && state->flags & QStringConverter::Flag::WriteBom;
qsizetype length = 4*len;
if (writeBom)
length += 4;
@@ -910,43 +921,62 @@ QByteArray QUtf32::convertFromUnicode(const QChar *uc, qsizetype len, QStringCon
if (endian == DetectEndianness)
endian = (QSysInfo::ByteOrder == QSysInfo::BigEndian) ? BigEndianness : LittleEndianness;
- QByteArray d(length, Qt::Uninitialized);
- char *data = d.data();
if (writeBom) {
if (endian == BigEndianness) {
- data[0] = 0;
- data[1] = 0;
- data[2] = (char)0xfe;
- data[3] = (char)0xff;
+ out[0] = 0;
+ out[1] = 0;
+ out[2] = (char)0xfe;
+ out[3] = (char)0xff;
} else {
- data[0] = (char)0xff;
- data[1] = (char)0xfe;
- data[2] = 0;
- data[3] = 0;
+ out[0] = (char)0xff;
+ out[1] = (char)0xfe;
+ out[2] = 0;
+ out[3] = 0;
}
- data += 4;
+ out += 4;
+ state->internalState |= HeaderDone;
}
- QStringIterator i(uc, uc + len);
- if (endian == BigEndianness) {
- while (i.hasNext()) {
- uint cp = i.next();
- qToBigEndian(cp, data);
- data += 4;
- }
- } else {
- while (i.hasNext()) {
- uint cp = i.next();
- qToLittleEndian(cp, data);
- data += 4;
- }
+ const QChar *end = uc + len;
+ QChar ch;
+ uint ucs4;
+ if (state->remainingChars == 1) {
+ ch = state->state_data[Data];
+ // this is ugly, but shortcuts a whole lot of logic that would otherwise be required
+ state->remainingChars = 0;
+ goto decode_surrogate;
}
- if (state) {
- state->remainingChars = 0;
- state->internalState |= HeaderDone;
+ while (uc < end) {
+ ch = *uc++;
+ if (Q_LIKELY(!ch.isSurrogate())) {
+ ucs4 = ch.unicode();
+ } else if (Q_LIKELY(ch.isHighSurrogate())) {
+decode_surrogate:
+ if (uc == end) {
+ if (state->flags & QStringConverter::Flag::Stateless) {
+ ucs4 = state->flags & QStringConverter::Flag::ConvertInvalidToNull ? 0 : QChar::ReplacementCharacter;
+ } else {
+ state->remainingChars = 1;
+ state->state_data[Data] = ch.unicode();
+ return out;
+ }
+ } else if (uc->isLowSurrogate()) {
+ ucs4 = QChar::surrogateToUcs4(ch, *uc++);
+ } else {
+ ucs4 = state->flags & QStringConverter::Flag::ConvertInvalidToNull ? 0 : QChar::ReplacementCharacter;
+ }
+ } else {
+ ucs4 = state->flags & QStringConverter::Flag::ConvertInvalidToNull ? 0 : QChar::ReplacementCharacter;
+ }
+ if (endian == BigEndianness)
+ qToBigEndian(ucs4, out);
+ else
+ qToLittleEndian(ucs4, out);
+ out += 4;
}
- return d;
+
+ return out;
}
QString QUtf32::convertToUnicode(const char *chars, qsizetype len, QStringConverter::State *state, DataEndianness endian)
@@ -1299,9 +1329,7 @@ static QChar *fromUtf32(QChar *out, const char *in, qsizetype length, QStringCon
static char *toUtf32(char *out, QStringView in, QStringConverter::State *state)
{
- QByteArray s = QUtf32::convertFromUnicode(in.data(), in.length(), state);
- memcpy(out, s.constData(), s.length());
- return out + s.length();
+ return QUtf32::convertFromUnicode(out, in.data(), in.length(), state, DetectEndianness);
}
static QChar *fromUtf32BE(QChar *out, const char *in, qsizetype length, QStringConverter::State *state)
@@ -1313,9 +1341,7 @@ static QChar *fromUtf32BE(QChar *out, const char *in, qsizetype length, QStringC
static char *toUtf32BE(char *out, QStringView in, QStringConverter::State *state)
{
- QByteArray s = QUtf32::convertFromUnicode(in.data(), in.length(), state, BigEndianness);
- memcpy(out, s.constData(), s.length());
- return out + s.length();
+ return QUtf32::convertFromUnicode(out, in.data(), in.length(), state, BigEndianness);
}
static QChar *fromUtf32LE(QChar *out, const char *in, qsizetype length, QStringConverter::State *state)
@@ -1327,9 +1353,7 @@ static QChar *fromUtf32LE(QChar *out, const char *in, qsizetype length, QStringC
static char *toUtf32LE(char *out, QStringView in, QStringConverter::State *state)
{
- QByteArray s = QUtf32::convertFromUnicode(in.data(), in.length(), state, LittleEndianness);
- memcpy(out, s.constData(), s.length());
- return out + s.length();
+ return QUtf32::convertFromUnicode(out, in.data(), in.length(), state, LittleEndianness);
}
void qt_from_latin1(char16_t *dst, const char *str, size_t size) noexcept;
diff --git a/src/corelib/text/qstringconverter_p.h b/src/corelib/text/qstringconverter_p.h
index 4768677a25..a579a362ff 100644
--- a/src/corelib/text/qstringconverter_p.h
+++ b/src/corelib/text/qstringconverter_p.h
@@ -314,6 +314,7 @@ struct QUtf32
{
static QString convertToUnicode(const char *, qsizetype, QStringConverter::State *, DataEndianness = DetectEndianness);
static QByteArray convertFromUnicode(const QChar *, qsizetype, QStringConverter::State *, DataEndianness = DetectEndianness);
+ static char *convertFromUnicode(char *out, const QChar *uc, qsizetype len, QStringConverter::State *state, DataEndianness endian);
};
struct QLocal8Bit