diff options
-rw-r--r-- | src/corelib/io/qurlidna.cpp | 45 | ||||
-rw-r--r-- | tests/auto/corelib/io/qurlinternal/tst_qurlinternal.cpp | 25 |
2 files changed, 61 insertions, 9 deletions
diff --git a/src/corelib/io/qurlidna.cpp b/src/corelib/io/qurlidna.cpp index 8dd01e0723..226bc8ba28 100644 --- a/src/corelib/io/qurlidna.cpp +++ b/src/corelib/io/qurlidna.cpp @@ -2120,31 +2120,52 @@ Q_AUTOTEST_EXPORT void qt_nameprep(QString *source, int from) } } -Q_AUTOTEST_EXPORT bool qt_check_std3rules(const QChar *uc, int len) +static const QChar *qt_find_nonstd3(const QChar *uc, int len, Qt::CaseSensitivity cs) { if (len > 63) - return false; + return uc; for (int i = 0; i < len; ++i) { ushort c = uc[i].unicode(); if (c == '-' && (i == 0 || i == len - 1)) - return false; + return uc + i; // verifying the absence of non-LDH is the same as verifying that // only LDH is present + if (cs == Qt::CaseInsensitive && (c >= 'A' && c <= 'Z')) + continue; if (c == '-' || (c >= '0' && c <= '9') - || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') //underscore is not supposed to be allowed, but other browser accept it (QTBUG-7434) || c == '_') continue; - return false; + return uc + i; } - return true; + return nullptr; +} + +Q_AUTOTEST_EXPORT bool qt_check_std3rules(const QChar *uc, int len) +{ + return qt_find_nonstd3(uc, len, Qt::CaseInsensitive) == nullptr; } +static bool qt_check_nameprepped_std3(const QChar *in, int len) +{ + // fast path: check for lowercase ASCII + const QChar *firstNonAscii = qt_find_nonstd3(in, len, Qt::CaseSensitive); + if (firstNonAscii == nullptr) { + // everything was lowercase ASCII, digits or hyphen + return true; + } + + const QChar *e = in + len; + QString origin = QString::fromRawData(firstNonAscii, e - firstNonAscii); + QString copy = origin; + qt_nameprep(©, 0); + return origin == copy; +} static inline uint encodeDigit(uint digit) { @@ -2546,13 +2567,19 @@ QString qt_ACE_do(const QString &domain, AceOperation op, AceLeadingDot dot) qt_punycodeEncoder(result.constData() + prevLen, result.size() - prevLen, &aceForm); // We use resize()+memcpy() here because we're overwriting the data we've copied + bool appended = false; if (isIdnEnabled) { QString tmp = qt_punycodeDecoder(aceForm); if (tmp.isEmpty()) return QString(); // shouldn't happen, since we've just punycode-encoded it - result.resize(prevLen + tmp.size()); - memcpy(result.data() + prevLen, tmp.constData(), tmp.size() * sizeof(QChar)); - } else { + if (qt_check_nameprepped_std3(tmp.constData(), tmp.size())) { + result.resize(prevLen + tmp.size()); + memcpy(result.data() + prevLen, tmp.constData(), tmp.size() * sizeof(QChar)); + appended = true; + } + } + + if (!appended) { result.resize(prevLen + aceForm.size()); memcpy(result.data() + prevLen, aceForm.constData(), aceForm.size() * sizeof(QChar)); } diff --git a/tests/auto/corelib/io/qurlinternal/tst_qurlinternal.cpp b/tests/auto/corelib/io/qurlinternal/tst_qurlinternal.cpp index 766338e4f8..bcf6d6c32b 100644 --- a/tests/auto/corelib/io/qurlinternal/tst_qurlinternal.cpp +++ b/tests/auto/corelib/io/qurlinternal/tst_qurlinternal.cpp @@ -650,6 +650,31 @@ void tst_QUrlInternal::ace_testsuite_data() << "xn--djrptm67aikb.xn--kpry57d" << "." << taiwaneseIDN; + + // violations / invalids + QTest::newRow("invalid-punycode") << "xn--z" << "xn--z" << "xn--z" << "xn--z"; + + // U+00A0 NO-BREAK SPACE encodes to Punycode "6a" + // but it is prohibited and should have caused encoding failure + QTest::newRow("invalid-nameprep-prohibited") << "xn--6a" << "xn--6a" << "xn--6a" << "xn--6a"; + + // U+00AD SOFT HYPHEN between "a" and "b" encodes to Punycode "ab-5da" + // but it should have been removed in the nameprep stage + QTest::newRow("invalid-nameprep-maptonothing") << "xn-ab-5da" << "xn-ab-5da" << "xn-ab-5da" << "xn-ab-5da"; + + // U+00C1 LATIN CAPITAL LETTER A WITH ACUTE encodes to Punycode "4ba" + // but it should have nameprepped to lowercase first + QTest::newRow("invalid-nameprep-uppercase") << "xn--4ba" << "xn--4ba" << "xn--4ba" << "xn--4ba"; + + // U+00B5 MICRO SIGN encodes to Punycode "sba" + // but is should have nameprepped to NFKC U+03BC GREEK SMALL LETTER MU + QTest::newRow("invalid-nameprep-nonnfkc") << "xn--sba" << "xn--sba" << "xn--sba" << "xn--sba"; + + // U+04CF CYRILLIC SMALL LETTER PALOCHKA encodes to "s5a" + // but it's not in RFC 3454's allowed character list (Unicode 3.2) + QTest::newRow("invalid-nameprep-unassigned") << "xn--s5a" << "xn--s5a" << "xn--s5a" << "xn--s5a"; + // same character, see QTBUG-60364 + QTest::newRow("invalid-nameprep-unassigned2") << "xn--80ak6aa92e" << "xn--80ak6aa92e" << "xn--80ak6aa92e" << "xn--80ak6aa92e"; } void tst_QUrlInternal::ace_testsuite() |