diff options
Diffstat (limited to 'src/corelib/io/qurlidna.cpp')
-rw-r--r-- | src/corelib/io/qurlidna.cpp | 100 |
1 files changed, 67 insertions, 33 deletions
diff --git a/src/corelib/io/qurlidna.cpp b/src/corelib/io/qurlidna.cpp index 5fc6006ef4..a2a81c7605 100644 --- a/src/corelib/io/qurlidna.cpp +++ b/src/corelib/io/qurlidna.cpp @@ -132,7 +132,7 @@ Q_AUTOTEST_EXPORT void qt_punycodeEncoder(QStringView in, QString *output) // delta = delta + (m - n) * (h + 1), fail on overflow uint tmp; - if (mul_overflow<uint>(m - n, h + 1, &tmp) || add_overflow<uint>(delta, tmp, &delta)) { + if (qMulOverflow<uint>(m - n, h + 1, &tmp) || qAddOverflow<uint>(delta, tmp, &delta)) { output->truncate(outLen); return; // punycode_overflow } @@ -144,7 +144,7 @@ Q_AUTOTEST_EXPORT void qt_punycodeEncoder(QStringView in, QString *output) // increase delta until we reach the character processed in this iteration; // fail if delta overflows. if (c < n) { - if (add_overflow<uint>(delta, 1, &delta)) { + if (qAddOverflow<uint>(delta, 1, &delta)) { output->truncate(outLen); return; // punycode_overflow } @@ -219,7 +219,7 @@ Q_AUTOTEST_EXPORT QString qt_punycodeDecoder(const QString &pc) // i = i + digit * w, fail on overflow uint tmp; - if (mul_overflow<uint>(digit, w, &tmp) || add_overflow<uint>(i, tmp, &i)) + if (qMulOverflow<uint>(digit, w, &tmp) || qAddOverflow<uint>(i, tmp, &i)) return QString(); // detect threshold to stop reading delta digits @@ -231,7 +231,7 @@ Q_AUTOTEST_EXPORT QString qt_punycodeDecoder(const QString &pc) if (digit < t) break; // w = w * (base - t), fail on overflow - if (mul_overflow<uint>(w, base - t, &w)) + if (qMulOverflow<uint>(w, base - t, &w)) return QString(); } @@ -241,7 +241,7 @@ Q_AUTOTEST_EXPORT QString qt_punycodeDecoder(const QString &pc) bias = adapt(i - oldi, outputLength + 1, oldi == 0); // n = n + i div (length(output) + 1), fail on overflow - if (add_overflow<uint>(n, i / (outputLength + 1), &n)) + if (qAddOverflow<uint>(n, i / (outputLength + 1), &n)) return QString(); // allow the deltas to wrap around @@ -320,19 +320,19 @@ Q_CONSTINIT static QStringList *user_idn_whitelist = nullptr; static bool lessThan(const QChar *a, int l, const char *c) { - const ushort *uc = (const ushort *)a; - const ushort *e = uc + l; + const auto *uc = reinterpret_cast<const char16_t *>(a); + const char16_t *e = uc + l; if (!c || *c == 0) return false; while (*c) { - if (uc == e || *uc != *c) + if (uc == e || *uc != static_cast<unsigned char>(*c)) break; ++uc; ++c; } - return (uc == e ? *c : *uc < *c); + return uc == e ? *c : (*uc < static_cast<unsigned char>(*c)); } static bool equal(const QChar *a, int l, const char *b) @@ -423,13 +423,19 @@ static QString mapDomainName(const QString &in, QUrl::AceProcessingOptions optio if (uc >= U'A' && uc <= U'Z') uc |= 0x20; // lower-case it - if (!isValidInNormalizedAsciiName(uc)) - return {}; + if (isValidInNormalizedAsciiName(uc)) { + result.append(static_cast<char16_t>(uc)); + continue; + } + } + + allAscii = false; - result.append(static_cast<char16_t>(uc)); + // Capital sharp S is a special case since UTR #46 revision 31 (Unicode 15.1) + if (uc == 0x1E9E && options.testFlag(QUrl::AceTransitionalProcessing)) { + result.append(u"ss"_s); continue; } - allAscii = false; QUnicodeTables::IdnaStatus status = QUnicodeTables::idnaStatus(uc); @@ -442,14 +448,13 @@ static QString mapDomainName(const QString &in, QUrl::AceProcessingOptions optio case QUnicodeTables::IdnaStatus::Ignored: continue; case QUnicodeTables::IdnaStatus::Valid: + case QUnicodeTables::IdnaStatus::Disallowed: for (auto c : QChar::fromUcs4(uc)) result.append(c); break; case QUnicodeTables::IdnaStatus::Mapped: result.append(QUnicodeTables::idnaMapping(uc)); break; - case QUnicodeTables::IdnaStatus::Disallowed: - return {}; default: Q_UNREACHABLE(); } @@ -483,12 +488,13 @@ class DomainValidityChecker { bool domainNameIsBidi = false; bool hadBidiErrors = false; + bool ignoreBidiErrors; static constexpr char32_t ZWNJ = U'\u200C'; static constexpr char32_t ZWJ = U'\u200D'; public: - DomainValidityChecker() { } + DomainValidityChecker(bool ignoreBidiErrors = false) : ignoreBidiErrors(ignoreBidiErrors) { } bool checkLabel(const QString &label, QUrl::AceProcessingOptions options); private: @@ -714,7 +720,7 @@ bool DomainValidityChecker::checkLabel(const QString &label, QUrl::AceProcessing // because non-BMP characters are unlikely to be used for specifying // future extensions. if (label[2] == u'-' && label[3] == u'-') - return false; + return ignoreBidiErrors && label.startsWith(u"xn") && validateAsciiLabel(label); } if (label.startsWith(u'-') || label.endsWith(u'-')) @@ -736,7 +742,7 @@ bool DomainValidityChecker::checkLabel(const QString &label, QUrl::AceProcessing for (;;) { hasJoiners = hasJoiners || c == ZWNJ || c == ZWJ; - if (!domainNameIsBidi) { + if (!ignoreBidiErrors && !domainNameIsBidi) { switch (QChar::direction(c)) { case QChar::DirR: case QChar::DirAL: @@ -777,25 +783,20 @@ bool DomainValidityChecker::checkLabel(const QString &label, QUrl::AceProcessing return true; } -static QString convertToAscii(const QString &normalizedDomain, AceLeadingDot dot) +static QString convertToAscii(QStringView normalizedDomain, AceLeadingDot dot) { qsizetype lastIdx = 0; QString aceForm; // this variable is here for caching QString aceResult; while (true) { - auto idx = normalizedDomain.indexOf(u'.', lastIdx); + qsizetype idx = normalizedDomain.indexOf(u'.', lastIdx); if (idx == -1) idx = normalizedDomain.size(); - const auto labelLength = idx - lastIdx; - if (labelLength == 0) { - if (idx == normalizedDomain.size()) - break; - if (dot == ForbidLeadingDot || idx > 0) - return {}; // two delimiters in a row -- empty label not allowed - } else { - const auto label = QStringView(normalizedDomain).sliced(lastIdx, labelLength); + const qsizetype labelLength = idx - lastIdx; + if (labelLength) { + const auto label = normalizedDomain.sliced(lastIdx, labelLength); aceForm.clear(); qt_punycodeEncoder(label, &aceForm); if (aceForm.isEmpty()) @@ -807,6 +808,9 @@ static QString convertToAscii(const QString &normalizedDomain, AceLeadingDot dot if (idx == normalizedDomain.size()) break; + if (labelLength == 0 && (dot == ForbidLeadingDot || idx > 0)) + return {}; // two delimiters in a row -- empty label not allowed + lastIdx = idx + 1; aceResult += u'.'; } @@ -814,7 +818,7 @@ static QString convertToAscii(const QString &normalizedDomain, AceLeadingDot dot return aceResult; } -static bool checkAsciiDomainName(const QString &normalizedDomain, AceLeadingDot dot, +static bool checkAsciiDomainName(QStringView normalizedDomain, AceLeadingDot dot, bool *usesPunycode) { qsizetype lastIdx = 0; @@ -833,7 +837,7 @@ static bool checkAsciiDomainName(const QString &normalizedDomain, AceLeadingDot if (dot == ForbidLeadingDot || idx > 0) return false; // two delimiters in a row -- empty label not allowed } else { - const auto label = QStringView(normalizedDomain).sliced(lastIdx, labelLength); + const auto label = normalizedDomain.sliced(lastIdx, labelLength); if (!validateAsciiLabel(label)) return false; @@ -886,6 +890,33 @@ static QString convertToUnicode(const QString &asciiDomain, QUrl::AceProcessingO return result; } +static bool checkUnicodeName(const QString &domainName, QUrl::AceProcessingOptions options) +{ + qsizetype lastIdx = 0; + + DomainValidityChecker checker(true); + + while (true) { + qsizetype idx = domainName.indexOf(u'.', lastIdx); + if (idx == -1) + idx = domainName.size(); + + const qsizetype labelLength = idx - lastIdx; + if (labelLength) { + const auto label = domainName.sliced(lastIdx, labelLength); + + if (!checker.checkLabel(label, options)) + return false; + } + + if (idx == domainName.size()) + break; + + lastIdx = idx + 1; + } + return true; +} + QString qt_ACE_do(const QString &domain, AceOperation op, AceLeadingDot dot, QUrl::AceProcessingOptions options) { @@ -900,12 +931,15 @@ QString qt_ACE_do(const QString &domain, AceOperation op, AceLeadingDot dot, if (normalized.isEmpty()) return {}; - bool needsCoversionToUnicode; + if (!mappedToAscii && !checkUnicodeName(normalized, options)) + return {}; + + bool needsConversionToUnicode; const QString aceResult = mappedToAscii ? normalized : convertToAscii(normalized, dot); - if (aceResult.isEmpty() || !checkAsciiDomainName(aceResult, dot, &needsCoversionToUnicode)) + if (aceResult.isEmpty() || !checkAsciiDomainName(aceResult, dot, &needsConversionToUnicode)) return {}; - if (op == ToAceOnly || !needsCoversionToUnicode + if (op == ToAceOnly || !needsConversionToUnicode || (!options.testFlag(QUrl::IgnoreIDNWhitelist) && !qt_is_idn_enabled(aceResult))) { return aceResult; } |