From 1abcc1cd3d1a8d04ccfd711c3293d0c671af3c78 Mon Sep 17 00:00:00 2001 From: Friedemann Kleint Date: Mon, 27 Oct 2014 10:45:47 +0100 Subject: Hardcode UTF-8 for "unicode" in QTextCodec::codecForHtml(). ICU would return a utf-16 (endian dependent) codec for unicode which is very rarely what people want. In most cases, unicode is encoded in utf8 these days, so return a utf8 codec for it. Task-number: QTBUG-41998 Change-Id: I51ee758d520702b263a8b2011787eb1f3455ed96 Reviewed-by: Lars Knoll --- src/corelib/codecs/qtextcodec.cpp | 5 ++++- tests/auto/corelib/codecs/qtextcodec/tst_qtextcodec.cpp | 4 ++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/corelib/codecs/qtextcodec.cpp b/src/corelib/codecs/qtextcodec.cpp index 24cb4e7038..9af307ca17 100644 --- a/src/corelib/codecs/qtextcodec.cpp +++ b/src/corelib/codecs/qtextcodec.cpp @@ -1049,7 +1049,10 @@ QTextCodec *QTextCodec::codecForHtml(const QByteArray &ba, QTextCodec *defaultCo while (++pos2 < header.size()) { char ch = header.at(pos2); if (ch == '\"' || ch == '\'' || ch == '>') { - c = QTextCodec::codecForName(header.mid(pos, pos2 - pos)); + QByteArray name = header.mid(pos, pos2 - pos); + if (name == "unicode") // QTBUG-41998, ICU will return UTF-16. + name = QByteArrayLiteral("UTF-8"); + c = QTextCodec::codecForName(name); return c ? c : defaultCodec; } } diff --git a/tests/auto/corelib/codecs/qtextcodec/tst_qtextcodec.cpp b/tests/auto/corelib/codecs/qtextcodec/tst_qtextcodec.cpp index 4e3d5c64bc..df2f97ce0e 100644 --- a/tests/auto/corelib/codecs/qtextcodec/tst_qtextcodec.cpp +++ b/tests/auto/corelib/codecs/qtextcodec/tst_qtextcodec.cpp @@ -1996,6 +1996,10 @@ void tst_QTextCodec::codecForHtml_data() "auto; word-spacing: 0px; -webkit-text-size-adjust: auto; -webkit-text-stroke-width: 0px; display: inline !important; float: " "none;\">ͻ\000"; QTest::newRow("greek text UTF-8") << html << 106 << 106; + + html = "" + "

bla

"; // QTBUG-41998, ICU will return UTF-16. + QTest::newRow("legacy unicode UTF-8") << html << 106 << 106; } void tst_QTextCodec::codecForHtml() -- cgit v1.2.3