From 7e1a0c07390ed18de074ce5450b272838a3b8ac8 Mon Sep 17 00:00:00 2001 From: Thiago Macieira Date: Sun, 16 Sep 2018 09:05:47 -0700 Subject: Add tests for decoding too-short UTF-8 sequences We were handling this properly, but not testing them. I guess we weren't testing because the condition is a valid intermediate state, so hasFailure() is correct it returning false. Testing inspired by the bug reported in https://github.com/intel/tinycbor/issues/137 Change-Id: Ib47c56818178458a88b4fffd1554ecfdd0af637e Reviewed-by: Lars Knoll --- tests/auto/corelib/codecs/utf8/tst_utf8.cpp | 15 ++++++++++++--- tests/auto/corelib/codecs/utf8/utf8data.cpp | 30 +++++++++++++++++++---------- 2 files changed, 32 insertions(+), 13 deletions(-) (limited to 'tests/auto') diff --git a/tests/auto/corelib/codecs/utf8/tst_utf8.cpp b/tests/auto/corelib/codecs/utf8/tst_utf8.cpp index 8f78aa937c..9ce1748e72 100644 --- a/tests/auto/corelib/codecs/utf8/tst_utf8.cpp +++ b/tests/auto/corelib/codecs/utf8/tst_utf8.cpp @@ -1,7 +1,7 @@ /**************************************************************************** ** -** Copyright (C) 2016 The Qt Company Ltd. -** Copyright (C) 2016 Intel Corporation. +** Copyright (C) 2018 The Qt Company Ltd. +** Copyright (C) 2018 Intel Corporation. ** Contact: https://www.qt.io/licensing/ ** ** This file is part of the test suite of the Qt Toolkit. @@ -71,7 +71,7 @@ void tst_Utf8::initTestCase() // is the locale UTF-8? if (QString(QChar(QChar::ReplacementCharacter)).toLocal8Bit() == "\xEF\xBF\xBD") { QTest::newRow("localecodec") << true; - qDebug() << "locale is utf8"; + qInfo() << "locale is utf8"; } } @@ -226,6 +226,15 @@ void tst_Utf8::invalidUtf8() // The system's UTF-8 codec is sometimes buggy // GNU libc's iconv is known to accept U+FFFF and U+FFFE encoded as UTF-8 // OS X's iconv is known to accept those, plus surrogates and codepoints above U+10FFFF + if (!useLocale) + QVERIFY(decoder->hasFailure() || decoder->needsMoreData()); + else if (!decoder->hasFailure() && !decoder->needsMoreData()) + qWarning("System codec does not report failure when it should. Should report bug upstream."); + + // add a continuation character and test that we don't accidentally use it + // (buffer overrun) + utf8 += char(0x80 | 0x3f); + decoder->toUnicode(utf8.constData(), utf8.size() - 1); if (!useLocale) QVERIFY(decoder->hasFailure()); else if (!decoder->hasFailure()) diff --git a/tests/auto/corelib/codecs/utf8/utf8data.cpp b/tests/auto/corelib/codecs/utf8/utf8data.cpp index 2267dc8514..221e1d5579 100644 --- a/tests/auto/corelib/codecs/utf8/utf8data.cpp +++ b/tests/auto/corelib/codecs/utf8/utf8data.cpp @@ -1,6 +1,7 @@ /**************************************************************************** ** -** Copyright (C) 2016 The Qt Company Ltd. +** Copyright (C) 2018 The Qt Company Ltd. +** Copyright (C) 2018 Intel Corporation. ** Contact: https://www.qt.io/licensing/ ** ** This file is part of the test suite of the Qt Toolkit. @@ -29,15 +30,24 @@ void loadInvalidUtf8Rows() { - QTest::newRow("1char") << QByteArray("\x80"); - QTest::newRow("2chars-1") << QByteArray("\xC2\xC0"); - QTest::newRow("2chars-2") << QByteArray("\xC3\xDF"); - QTest::newRow("2chars-3") << QByteArray("\xC7\xF0"); - QTest::newRow("3chars-1") << QByteArray("\xE0\xA0\xC0"); - QTest::newRow("3chars-2") << QByteArray("\xE0\xC0\xA0"); - QTest::newRow("4chars-1") << QByteArray("\xF0\x90\x80\xC0"); - QTest::newRow("4chars-2") << QByteArray("\xF0\x90\xC0\x80"); - QTest::newRow("4chars-3") << QByteArray("\xF0\xC0\x80\x80"); + // Wrong continuations + QTest::newRow("bad-continuation-1char") << QByteArray("\x80"); + QTest::newRow("bad-continuation-2chars-1") << QByteArray("\xC2\xC0"); + QTest::newRow("bad-continuation-2chars-2") << QByteArray("\xC3\xDF"); + QTest::newRow("bad-continuation-2chars-3") << QByteArray("\xC7\xF0"); + QTest::newRow("bad-continuation-3chars-1") << QByteArray("\xE0\xA0\xC0"); + QTest::newRow("bad-continuation-3chars-2") << QByteArray("\xE0\xC0\xA0"); + QTest::newRow("bad-continuation-4chars-1") << QByteArray("\xF0\x90\x80\xC0"); + QTest::newRow("bad-continuation-4chars-2") << QByteArray("\xF0\x90\xC0\x80"); + QTest::newRow("bad-continuation-4chars-3") << QByteArray("\xF0\xC0\x80\x80"); + + // Too short + QTest::newRow("too-short-2chars") << QByteArray("\xC2"); + QTest::newRow("too-short-3chars-1") << QByteArray("\xE0"); + QTest::newRow("too-short-3chars-2") << QByteArray("\xE0\xA0"); + QTest::newRow("too-short-4chars-1") << QByteArray("\xF0"); + QTest::newRow("too-short-4chars-2") << QByteArray("\xF0\x90"); + QTest::newRow("too-short-4chars-3") << QByteArray("\xF0\x90\x80"); // Surrogate pairs must now be present either // U+D800: 1101 10 0000 00 0000 -- cgit v1.2.3