summaryrefslogtreecommitdiffstats
path: root/src/corelib/text/qstringconverter.cpp
diff options
context:
space:
mode:
authorMarc Mutz <marc.mutz@qt.io>2022-10-31 17:27:15 +0100
committerMarc Mutz <marc.mutz@qt.io>2022-11-02 17:21:28 +0100
commit8acec4dbe6f9faac2c48a8be67f73e0d2ec1185b (patch)
treee38fbf542db6b56993fb8017cef34d123f3ac133 /src/corelib/text/qstringconverter.cpp
parent3834fee3d33fde303c0a1535c1ef0439aa05c6b0 (diff)
Long live QUtf8::convertFromLatin1()!
With the introduction of QAnyStringView, overloading based on UTF-8 and Latin-1 is becoming more common. Often, the two overloads can share the processing backend, because we're only interested in the US-ASCII subset of each. But if they can't, we need a faster way to convert L1 into UTF-8 than going via UTF-16. This is where the new private API comes in. Eventually, we should have the converse operation, too, to complete the set of direct conversions between the possible three QAnyStringView encodings L1/U8/U16, but this direction is easier to code (there are no error cases) and more immediately useful, so provide L1->U8 alone for now. Change-Id: I3f7e1a9c89979d0eb604cb9e42dedf3d514fca2c Reviewed-by: Edward Welbourne <edward.welbourne@qt.io> Reviewed-by: Qt CI Bot <qt_ci_bot@qt-project.org> Reviewed-by: MÃ¥rten Nordheim <marten.nordheim@qt.io> Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Diffstat (limited to 'src/corelib/text/qstringconverter.cpp')
-rw-r--r--src/corelib/text/qstringconverter.cpp15
1 files changed, 15 insertions, 0 deletions
diff --git a/src/corelib/text/qstringconverter.cpp b/src/corelib/text/qstringconverter.cpp
index 329e1990da..a0e0173cbb 100644
--- a/src/corelib/text/qstringconverter.cpp
+++ b/src/corelib/text/qstringconverter.cpp
@@ -571,6 +571,21 @@ char *QUtf8::convertFromUnicode(char *out, QStringView in, QStringConverter::Sta
return reinterpret_cast<char *>(cursor);
}
+char *QUtf8::convertFromLatin1(char *out, QLatin1StringView in)
+{
+ // ### SIMD-optimize:
+ for (uchar ch : in) {
+ if (ch < 128) {
+ *out++ = ch;
+ } else {
+ // as per https://en.wikipedia.org/wiki/UTF-8#Encoding, 2nd row
+ *out++ = 0b110'0'0000u | (ch >> 6);
+ *out++ = 0b10'00'0000u | (ch & 0b0011'1111);
+ }
+ }
+ return out;
+}
+
QString QUtf8::convertToUnicode(QByteArrayView in)
{
// UTF-8 to UTF-16 always needs the exact same number of words or less: