From 86115848b55faa747adf8bb39a213b3cec7673c4 Mon Sep 17 00:00:00 2001 From: Mitch Curtis Date: Thu, 7 Feb 2013 10:24:01 +0100 Subject: Correctly detect HTML 5 charset attribute in QTextCodec::codecForHtml() QTextCodec::codecForHtml currently fails to detect the charset for this HTML: Test This patch makes the detection of charsets more flexible, allowing for the use of the HTML 5 charset attribute as well more terminator characters ("'", and ">"). I also added a *_data function for the unit tests. Task-number: QTBUG-5451 Change-Id: I69fe4a04582f0d845cbbe9140a86a950fb7dc861 Reviewed-by: Olivier Goffart Reviewed-by: Denis Dzyubenko --- .../corelib/codecs/qtextcodec/tst_qtextcodec.cpp | 75 +++++++++++++++++++--- 1 file changed, 67 insertions(+), 8 deletions(-) (limited to 'tests/auto/corelib/codecs') diff --git a/tests/auto/corelib/codecs/qtextcodec/tst_qtextcodec.cpp b/tests/auto/corelib/codecs/qtextcodec/tst_qtextcodec.cpp index 6c566e017b..b5f736cfbb 100644 --- a/tests/auto/corelib/codecs/qtextcodec/tst_qtextcodec.cpp +++ b/tests/auto/corelib/codecs/qtextcodec/tst_qtextcodec.cpp @@ -84,6 +84,7 @@ private slots: void utfHeaders_data(); void utfHeaders(); + void codecForHtml_data(); void codecForHtml(); void codecForUtfText_data(); @@ -1853,23 +1854,81 @@ void tst_QTextCodec::utfHeaders() } } -void tst_QTextCodec::codecForHtml() +void tst_QTextCodec::codecForHtml_data() { - QByteArray html("blah"); + QTest::addColumn("html"); + QTest::addColumn("defaultCodecMib"); + QTest::addColumn("expectedMibEnum"); - QCOMPARE(QTextCodec::codecForHtml(html)->mibEnum(), 4); // latin 1 + int noDefault = -1; + int fallback = 4; // latin 1 + QByteArray html = "blah"; + QTest::newRow("no charset, latin 1") << html << noDefault << fallback; - QCOMPARE(QTextCodec::codecForHtml(html, QTextCodec::codecForMib(106))->mibEnum(), 106); // UTF-8 + QTest::newRow("no charset, default UTF-8") << html << 106 << 106; html = ""; - QCOMPARE(QTextCodec::codecForHtml(html, QTextCodec::codecForMib(106))->mibEnum(), 111); // latin 15 + QTest::newRow("latin 15, default UTF-8") << html << 106 << 111; html = ""; - QCOMPARE(QTextCodec::codecForHtml(html, QTextCodec::codecForMib(106))->mibEnum(), 111); // latin 15 + QTest::newRow("latin 15, default UTF-8 (#2)") << html << 106 << 111; + + html = "Test"; + QTest::newRow("UTF-8, no default") << html << noDefault << 106; + + html = "Test"; + QTest::newRow("latin 1, no default") << html << noDefault << 4; + + html = "Test"; + QTest::newRow("UTF-8, no default (#2)") << html << noDefault << 106; + + html = ""; + QTest::newRow("UTF-8, no quotes") << html << noDefault << 106; + + html = ""; + QTest::newRow("UTF-8, single quotes") << html << noDefault << 106; + + html = "Test"; + QTest::newRow("UTF-8, > terminator") << html << noDefault << 106; + + html = "Test"; + QTest::newRow("UTF-8, > terminator with spaces") << html << noDefault << 106; + + html = "Test"; + QTest::newRow("UTF-8, > teminator with early backslash)") << html << noDefault << 106; + // Test invalid charsets. html = ""; - QCOMPARE(QTextCodec::codecForHtml(html, QTextCodec::codecForMib(106))->mibEnum(), 106); // UTF-8 - QCOMPARE(QTextCodec::codecForHtml(html)->mibEnum(), 4); // latin 1 + QTest::newRow("invalid charset, no default") << html << noDefault << fallback; + QTest::newRow("invalid charset, default UTF-8") << html << 106 << 106; + + html = "Test"; + QTest::newRow("invalid charset, early terminator (')") << html << noDefault << fallback; +} + +void tst_QTextCodec::codecForHtml() +{ + QFETCH(QByteArray, html); + QFETCH(int, defaultCodecMib); + QFETCH(int, expectedMibEnum); + + if (defaultCodecMib != -1) + QCOMPARE(QTextCodec::codecForHtml(html, QTextCodec::codecForMib(defaultCodecMib))->mibEnum(), expectedMibEnum); + else // Test one parameter version when there is no default codec. + QCOMPARE(QTextCodec::codecForHtml(html)->mibEnum(), expectedMibEnum); } void tst_QTextCodec::codecForUtfText_data() -- cgit v1.2.3