diff options
author | Mitch Curtis <mitch.curtis@digia.com> | 2013-02-07 10:24:01 +0100 |
---|---|---|
committer | The Qt Project <gerrit-noreply@qt-project.org> | 2013-02-12 01:31:26 +0100 |
commit | 86115848b55faa747adf8bb39a213b3cec7673c4 (patch) | |
tree | c6a49f6eba56f3eb2986e9f729362084948c5a78 /tests | |
parent | 7abc1a6a828eb36d26901d4ea5ecf8b4514f3cd7 (diff) |
Correctly detect HTML 5 charset attribute in QTextCodec::codecForHtml()
QTextCodec::codecForHtml currently fails to detect the charset for this
HTML:
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=9,chrome=1">
<title>Test</title>
</head>
This patch makes the detection of charsets more flexible, allowing for
the use of the HTML 5 charset attribute as well more terminator characters
("'", and ">").
I also added a *_data function for the unit tests.
Task-number: QTBUG-5451
Change-Id: I69fe4a04582f0d845cbbe9140a86a950fb7dc861
Reviewed-by: Olivier Goffart <ogoffart@woboq.com>
Reviewed-by: Denis Dzyubenko <denis@ddenis.info>
Diffstat (limited to 'tests')
-rw-r--r-- | tests/auto/corelib/codecs/qtextcodec/tst_qtextcodec.cpp | 75 |
1 files changed, 67 insertions, 8 deletions
diff --git a/tests/auto/corelib/codecs/qtextcodec/tst_qtextcodec.cpp b/tests/auto/corelib/codecs/qtextcodec/tst_qtextcodec.cpp index 6c566e017b..b5f736cfbb 100644 --- a/tests/auto/corelib/codecs/qtextcodec/tst_qtextcodec.cpp +++ b/tests/auto/corelib/codecs/qtextcodec/tst_qtextcodec.cpp @@ -84,6 +84,7 @@ private slots: void utfHeaders_data(); void utfHeaders(); + void codecForHtml_data(); void codecForHtml(); void codecForUtfText_data(); @@ -1853,23 +1854,81 @@ void tst_QTextCodec::utfHeaders() } } -void tst_QTextCodec::codecForHtml() +void tst_QTextCodec::codecForHtml_data() { - QByteArray html("<html><head></head><body>blah</body></html>"); + QTest::addColumn<QByteArray>("html"); + QTest::addColumn<int>("defaultCodecMib"); + QTest::addColumn<int>("expectedMibEnum"); - QCOMPARE(QTextCodec::codecForHtml(html)->mibEnum(), 4); // latin 1 + int noDefault = -1; + int fallback = 4; // latin 1 + QByteArray html = "<html><head></head><body>blah</body></html>"; + QTest::newRow("no charset, latin 1") << html << noDefault << fallback; - QCOMPARE(QTextCodec::codecForHtml(html, QTextCodec::codecForMib(106))->mibEnum(), 106); // UTF-8 + QTest::newRow("no charset, default UTF-8") << html << 106 << 106; html = "<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=ISO-8859-15\" /></head></html>"; - QCOMPARE(QTextCodec::codecForHtml(html, QTextCodec::codecForMib(106))->mibEnum(), 111); // latin 15 + QTest::newRow("latin 15, default UTF-8") << html << 106 << 111; html = "<html><head><meta content=\"text/html; charset=ISO-8859-15\" http-equiv=\"content-type\" /></head></html>"; - QCOMPARE(QTextCodec::codecForHtml(html, QTextCodec::codecForMib(106))->mibEnum(), 111); // latin 15 + QTest::newRow("latin 15, default UTF-8 (#2)") << html << 106 << 111; + + html = "<!DOCTYPE html><html><head><meta charset=\"utf-8\"><meta http-equiv=\"X-UA-Compatible\" content=\"IE=9,chrome=1\"><title>Test</title></head>"; + QTest::newRow("UTF-8, no default") << html << noDefault << 106; + + html = "<!DOCTYPE html><html><head><meta charset=\"ISO_8859-1:1987\"><meta http-equiv=\"X-UA-Compatible\" content=\"IE=9,chrome=1\"><title>Test</title></head>"; + QTest::newRow("latin 1, no default") << html << noDefault << 4; + + html = "<!DOCTYPE html><html><head><meta http-equiv=\"X-UA-Compatible\" content=\"IE=9,chrome=1\"><meta charset=\"utf-8\"><title>Test</title></head>"; + QTest::newRow("UTF-8, no default (#2)") << html << noDefault << 106; + + html = "<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8/></head></html>"; + QTest::newRow("UTF-8, no quotes") << html << noDefault << 106; + + html = "<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset='UTF-8'/></head></html>"; + QTest::newRow("UTF-8, single quotes") << html << noDefault << 106; + + html = "<!DOCTYPE html><html><head><meta charset=utf-8><title>Test</title></head>"; + QTest::newRow("UTF-8, > terminator") << html << noDefault << 106; + + html = "<!DOCTYPE html><html><head><meta charset= utf-8 ><title>Test</title></head>"; + QTest::newRow("UTF-8, > terminator with spaces") << html << noDefault << 106; + + html = "<!DOCTYPE html><html><head><meta charset= utf/8 ><title>Test</title></head>"; + QTest::newRow("UTF-8, > teminator with early backslash)") << html << noDefault << 106; + // Test invalid charsets. html = "<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=invalid-foo\" /></head></html>"; - QCOMPARE(QTextCodec::codecForHtml(html, QTextCodec::codecForMib(106))->mibEnum(), 106); // UTF-8 - QCOMPARE(QTextCodec::codecForHtml(html)->mibEnum(), 4); // latin 1 + QTest::newRow("invalid charset, no default") << html << noDefault << fallback; + QTest::newRow("invalid charset, default UTF-8") << html << 106 << 106; + + html = "<!DOCTYPE html><html><head><meta http-equiv=\"X-UA-Compatible\" content=\"IE=9,chrome=1\"><meta charset=\""; + html.prepend(QByteArray().fill(' ', 512 - html.size())); + QTest::newRow("invalid charset (large header)") << html << noDefault << fallback; + + html = "<!DOCTYPE html><html><head><meta http-equiv=\"X-UA-Compatible\" content=\"IE=9,chrome=1\"><meta charset=\"utf-8"; + QTest::newRow("invalid charset (no closing double quote)") << html << noDefault << fallback; + + html = "<!DOCTYPE html><html><head><meta http-equiv=\"X-UA-Compatible\" content=\"IE=9,chrome=1\"><meta charset='utf-8"; + QTest::newRow("invalid charset (no closing single quote)") << html << noDefault << fallback; + + html = "<!DOCTYPE html><html><head><meta charset=utf-8 foo=bar><title>Test</title></head>"; + QTest::newRow("invalid (space terminator)") << html << noDefault << fallback; + + html = "<!DOCTYPE html><html><head><meta charset=\" utf' 8 /><title>Test</title></head>"; + QTest::newRow("invalid charset, early terminator (')") << html << noDefault << fallback; +} + +void tst_QTextCodec::codecForHtml() +{ + QFETCH(QByteArray, html); + QFETCH(int, defaultCodecMib); + QFETCH(int, expectedMibEnum); + + if (defaultCodecMib != -1) + QCOMPARE(QTextCodec::codecForHtml(html, QTextCodec::codecForMib(defaultCodecMib))->mibEnum(), expectedMibEnum); + else // Test one parameter version when there is no default codec. + QCOMPARE(QTextCodec::codecForHtml(html)->mibEnum(), expectedMibEnum); } void tst_QTextCodec::codecForUtfText_data() |