summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorLars Knoll <lars.knoll@qt.io>2022-05-31 11:58:50 +0200
committerQt Cherry-pick Bot <cherrypick_bot@qt-project.org>2022-06-25 15:26:08 +0000
commit1197280748b017cbf0c6234c7cb226eca73c00eb (patch)
treed4d16080891cb9263afd766d17d977436f31728a /src
parent24e7671e2014a9ec4b99a5abebeed0f0fb75bf48 (diff)
Add QStringDecoder::decoderForHtml()
Now that QStringConverter can handle non UTF encodings through ICU, add a way to get a decoder for arbitrary HTML code. Opposed to QStringConverter::encodingForHtml(), this method will try to create a valid string decoder also for non unicode codecs. Change-Id: I343584da1b114396c744f482d9b433c9cedcc511 Reviewed-by: Fabian Kosmale <fabian.kosmale@qt.io> Reviewed-by: Lars Knoll <lars.knoll@qt.io> Reviewed-by: Thiago Macieira <thiago.macieira@intel.com> (cherry picked from commit 9c1f3b6d4d5a5fe59396062c6f68cc1201665c62) Reviewed-by: Qt Cherry-pick Bot <cherrypick_bot@qt-project.org>
Diffstat (limited to 'src')
-rw-r--r--src/corelib/kernel/qmimedata.cpp7
-rw-r--r--src/corelib/text/qstringconverter.cpp64
-rw-r--r--src/corelib/text/qstringconverter.h3
-rw-r--r--src/widgets/widgets/qtextbrowser.cpp9
4 files changed, 60 insertions, 23 deletions
diff --git a/src/corelib/kernel/qmimedata.cpp b/src/corelib/kernel/qmimedata.cpp
index 661a30b167..02a86bff37 100644
--- a/src/corelib/kernel/qmimedata.cpp
+++ b/src/corelib/kernel/qmimedata.cpp
@@ -127,10 +127,9 @@ QVariant QMimeDataPrivate::retrieveTypedData(const QString &format, QMetaType ty
if (ba.isNull())
return QVariant();
if (format == "text/html"_L1) {
- auto encoding = QStringConverter::encodingForHtml(ba);
- if (encoding) {
- QStringDecoder toUtf16(*encoding);
- return QString(toUtf16(ba));
+ QStringDecoder decoder = QStringDecoder::decoderForHtml(ba);
+ if (decoder.isValid()) {
+ return QString(decoder(ba));
}
// fall back to utf8
}
diff --git a/src/corelib/text/qstringconverter.cpp b/src/corelib/text/qstringconverter.cpp
index ba13194310..38a434f353 100644
--- a/src/corelib/text/qstringconverter.cpp
+++ b/src/corelib/text/qstringconverter.cpp
@@ -2049,20 +2049,8 @@ QStringConverter::encodingForData(QByteArrayView data, char16_t expectedFirstCha
return std::nullopt;
}
-/*!
- Tries to determine the encoding of the HTML in \a data by looking at leading byte
- order marks or a charset specifier in the HTML meta tag. If the optional is empty,
- the encoding specified is not supported by QStringConverter. If no encoding is
- detected, the method returns Utf8.
-*/
-std::optional<QStringConverter::Encoding> QStringConverter::encodingForHtml(QByteArrayView data)
+static QByteArray parseHtmlMetaForEncoding(QByteArrayView data)
{
- // determine charset
- auto encoding = encodingForData(data);
- if (encoding)
- // trust the initial BOM
- return encoding;
-
static constexpr auto metaSearcher = qMakeStaticByteArrayMatcher("meta ");
static constexpr auto charsetSearcher = qMakeStaticByteArrayMatcher("charset=");
@@ -2089,15 +2077,63 @@ std::optional<QStringConverter::Encoding> QStringConverter::encodingForHtml(QByt
if (name == "unicode") // QTBUG-41998, ICU will return UTF-16.
name = QByteArrayLiteral("UTF-8");
if (!name.isEmpty())
- return encodingForName(name);
+ return name;
}
}
}
}
+ return QByteArray();
+}
+
+/*!
+ Tries to determine the encoding of the HTML in \a data by looking at leading byte
+ order marks or a charset specifier in the HTML meta tag. If the optional is empty,
+ the encoding specified is not supported by QStringConverter. If no encoding is
+ detected, the method returns Utf8.
+
+ \sa QStringDecoder::decoderForHtml()
+*/
+std::optional<QStringConverter::Encoding> QStringConverter::encodingForHtml(QByteArrayView data)
+{
+ // determine charset
+ std::optional<QStringConverter::Encoding> encoding = encodingForData(data);
+ if (encoding)
+ // trust the initial BOM
+ return encoding;
+
+ QByteArray encodingTag = parseHtmlMetaForEncoding(data);
+ if (!encodingTag.isEmpty())
+ return encodingForName(encodingTag);
+
return Utf8;
}
/*!
+ Tries to determine the encoding of the HTML in \a data by looking at leading byte
+ order marks or a charset specifier in the HTML meta tag and returns a QStringDecoder
+ matching the encoding. If the returned decoder is not valid,
+ the encoding specified is not supported by QStringConverter. If no encoding is
+ detected, the method returns a decoder for Utf8.
+
+ \sa isValid()
+*/
+QStringDecoder QStringDecoder::decoderForHtml(QByteArrayView data)
+{
+ // determine charset
+ std::optional<QStringConverter::Encoding> encoding = encodingForData(data);
+ if (encoding)
+ // trust the initial BOM
+ return QStringDecoder(encoding.value());
+
+ QByteArray encodingTag = parseHtmlMetaForEncoding(data);
+ if (!encodingTag.isEmpty())
+ return QStringDecoder(encodingTag);
+
+ return QStringDecoder(Utf8);
+}
+
+
+/*!
Returns the canonical name for encoding \a e.
*/
const char *QStringConverter::nameForEncoding(QStringConverter::Encoding e)
diff --git a/src/corelib/text/qstringconverter.h b/src/corelib/text/qstringconverter.h
index c1645f0938..6664beccb3 100644
--- a/src/corelib/text/qstringconverter.h
+++ b/src/corelib/text/qstringconverter.h
@@ -140,6 +140,9 @@ public:
}
return iface->toUtf16(out, ba, &state);
}
+
+ Q_CORE_EXPORT static QStringDecoder decoderForHtml(QByteArrayView data);
+
private:
QString decodeAsString(QByteArrayView in)
{
diff --git a/src/widgets/widgets/qtextbrowser.cpp b/src/widgets/widgets/qtextbrowser.cpp
index 0273331c40..2bab2efc61 100644
--- a/src/widgets/widgets/qtextbrowser.cpp
+++ b/src/widgets/widgets/qtextbrowser.cpp
@@ -288,12 +288,11 @@ void QTextBrowserPrivate::setSource(const QUrl &url, QTextDocument::ResourceType
} else if (data.userType() == QMetaType::QByteArray) {
QByteArray ba = data.toByteArray();
if (type == QTextDocument::HtmlResource) {
- auto encoding = QStringConverter::encodingForHtml(ba);
- if (!encoding)
+ auto decoder = QStringDecoder::decoderForHtml(ba);
+ if (!decoder.isValid())
// fall back to utf8
- encoding = QStringDecoder::Utf8;
- QStringDecoder toUtf16(*encoding);
- txt = toUtf16(ba);
+ decoder = QStringDecoder(QStringDecoder::Utf8);
+ txt = decoder(ba);
} else {
txt = QString::fromUtf8(ba);
}