diff options
Diffstat (limited to 'src/corelib/io/qurlidna.cpp')
-rw-r--r-- | src/corelib/io/qurlidna.cpp | 112 |
1 files changed, 73 insertions, 39 deletions
diff --git a/src/corelib/io/qurlidna.cpp b/src/corelib/io/qurlidna.cpp index 04b9a25886..a2a81c7605 100644 --- a/src/corelib/io/qurlidna.cpp +++ b/src/corelib/io/qurlidna.cpp @@ -73,11 +73,11 @@ Q_AUTOTEST_EXPORT void qt_punycodeEncoder(QStringView in, QString *output) // Do not try to encode strings that certainly will result in output // that is longer than allowable domain name label length. Note that // non-BMP codepoints are encoded as two QChars. - if (in.length() > MaxDomainLabelLength * 2) + if (in.size() > MaxDomainLabelLength * 2) return; - int outLen = output->length(); - output->resize(outLen + in.length()); + int outLen = output->size(); + output->resize(outLen + in.size()); QChar *d = output->data() + outLen; bool skipped = false; @@ -132,7 +132,7 @@ Q_AUTOTEST_EXPORT void qt_punycodeEncoder(QStringView in, QString *output) // delta = delta + (m - n) * (h + 1), fail on overflow uint tmp; - if (mul_overflow<uint>(m - n, h + 1, &tmp) || add_overflow<uint>(delta, tmp, &delta)) { + if (qMulOverflow<uint>(m - n, h + 1, &tmp) || qAddOverflow<uint>(delta, tmp, &delta)) { output->truncate(outLen); return; // punycode_overflow } @@ -144,7 +144,7 @@ Q_AUTOTEST_EXPORT void qt_punycodeEncoder(QStringView in, QString *output) // increase delta until we reach the character processed in this iteration; // fail if delta overflows. if (c < n) { - if (add_overflow<uint>(delta, 1, &delta)) { + if (qAddOverflow<uint>(delta, 1, &delta)) { output->truncate(outLen); return; // punycode_overflow } @@ -177,7 +177,7 @@ Q_AUTOTEST_EXPORT QString qt_punycodeDecoder(const QString &pc) // Do not try to decode strings longer than allowable for a domain label. // Non-ASCII strings are not allowed here anyway, so there is no need // to account for surrogates. - if (pc.length() > MaxDomainLabelLength) + if (pc.size() > MaxDomainLabelLength) return QString(); // strip any ACE prefix @@ -219,7 +219,7 @@ Q_AUTOTEST_EXPORT QString qt_punycodeDecoder(const QString &pc) // i = i + digit * w, fail on overflow uint tmp; - if (mul_overflow<uint>(digit, w, &tmp) || add_overflow<uint>(i, tmp, &i)) + if (qMulOverflow<uint>(digit, w, &tmp) || qAddOverflow<uint>(i, tmp, &i)) return QString(); // detect threshold to stop reading delta digits @@ -231,7 +231,7 @@ Q_AUTOTEST_EXPORT QString qt_punycodeDecoder(const QString &pc) if (digit < t) break; // w = w * (base - t), fail on overflow - if (mul_overflow<uint>(w, base - t, &w)) + if (qMulOverflow<uint>(w, base - t, &w)) return QString(); } @@ -241,7 +241,7 @@ Q_AUTOTEST_EXPORT QString qt_punycodeDecoder(const QString &pc) bias = adapt(i - oldi, outputLength + 1, oldi == 0); // n = n + i div (length(output) + 1), fail on overflow - if (add_overflow<uint>(n, i / (outputLength + 1), &n)) + if (qAddOverflow<uint>(n, i / (outputLength + 1), &n)) return QString(); // allow the deltas to wrap around @@ -320,19 +320,19 @@ Q_CONSTINIT static QStringList *user_idn_whitelist = nullptr; static bool lessThan(const QChar *a, int l, const char *c) { - const ushort *uc = (const ushort *)a; - const ushort *e = uc + l; + const auto *uc = reinterpret_cast<const char16_t *>(a); + const char16_t *e = uc + l; if (!c || *c == 0) return false; while (*c) { - if (uc == e || *uc != *c) + if (uc == e || *uc != static_cast<unsigned char>(*c)) break; ++uc; ++c; } - return (uc == e ? *c : *uc < *c); + return uc == e ? *c : (*uc < static_cast<unsigned char>(*c)); } static bool equal(const QChar *a, int l, const char *b) @@ -423,13 +423,19 @@ static QString mapDomainName(const QString &in, QUrl::AceProcessingOptions optio if (uc >= U'A' && uc <= U'Z') uc |= 0x20; // lower-case it - if (!isValidInNormalizedAsciiName(uc)) - return {}; + if (isValidInNormalizedAsciiName(uc)) { + result.append(static_cast<char16_t>(uc)); + continue; + } + } + + allAscii = false; - result.append(static_cast<char16_t>(uc)); + // Capital sharp S is a special case since UTR #46 revision 31 (Unicode 15.1) + if (uc == 0x1E9E && options.testFlag(QUrl::AceTransitionalProcessing)) { + result.append(u"ss"_s); continue; } - allAscii = false; QUnicodeTables::IdnaStatus status = QUnicodeTables::idnaStatus(uc); @@ -442,14 +448,13 @@ static QString mapDomainName(const QString &in, QUrl::AceProcessingOptions optio case QUnicodeTables::IdnaStatus::Ignored: continue; case QUnicodeTables::IdnaStatus::Valid: + case QUnicodeTables::IdnaStatus::Disallowed: for (auto c : QChar::fromUcs4(uc)) result.append(c); break; case QUnicodeTables::IdnaStatus::Mapped: result.append(QUnicodeTables::idnaMapping(uc)); break; - case QUnicodeTables::IdnaStatus::Disallowed: - return {}; default: Q_UNREACHABLE(); } @@ -468,7 +473,7 @@ static QString mapDomainName(const QString &in, QUrl::AceProcessingOptions optio */ static bool validateAsciiLabel(QStringView label) { - if (label.length() > MaxDomainLabelLength) + if (label.size() > MaxDomainLabelLength) return false; if (label.first() == u'-' || label.last() == u'-') @@ -483,12 +488,13 @@ class DomainValidityChecker { bool domainNameIsBidi = false; bool hadBidiErrors = false; + bool ignoreBidiErrors; static constexpr char32_t ZWNJ = U'\u200C'; static constexpr char32_t ZWJ = U'\u200D'; public: - DomainValidityChecker() { } + DomainValidityChecker(bool ignoreBidiErrors = false) : ignoreBidiErrors(ignoreBidiErrors) { } bool checkLabel(const QString &label, QUrl::AceProcessingOptions options); private: @@ -709,12 +715,12 @@ bool DomainValidityChecker::checkLabel(const QString &label, QUrl::AceProcessing if (label != label.normalized(QString::NormalizationForm_C)) return false; - if (label.length() >= 4) { + if (label.size() >= 4) { // This assumes that the first two characters are in BMP, but that's ok // because non-BMP characters are unlikely to be used for specifying // future extensions. if (label[2] == u'-' && label[3] == u'-') - return false; + return ignoreBidiErrors && label.startsWith(u"xn") && validateAsciiLabel(label); } if (label.startsWith(u'-') || label.endsWith(u'-')) @@ -736,7 +742,7 @@ bool DomainValidityChecker::checkLabel(const QString &label, QUrl::AceProcessing for (;;) { hasJoiners = hasJoiners || c == ZWNJ || c == ZWJ; - if (!domainNameIsBidi) { + if (!ignoreBidiErrors && !domainNameIsBidi) { switch (QChar::direction(c)) { case QChar::DirR: case QChar::DirAL: @@ -777,25 +783,20 @@ bool DomainValidityChecker::checkLabel(const QString &label, QUrl::AceProcessing return true; } -static QString convertToAscii(const QString &normalizedDomain, AceLeadingDot dot) +static QString convertToAscii(QStringView normalizedDomain, AceLeadingDot dot) { qsizetype lastIdx = 0; QString aceForm; // this variable is here for caching QString aceResult; while (true) { - auto idx = normalizedDomain.indexOf(u'.', lastIdx); + qsizetype idx = normalizedDomain.indexOf(u'.', lastIdx); if (idx == -1) idx = normalizedDomain.size(); - const auto labelLength = idx - lastIdx; - if (labelLength == 0) { - if (idx == normalizedDomain.size()) - break; - if (dot == ForbidLeadingDot || idx > 0) - return {}; // two delimiters in a row -- empty label not allowed - } else { - const auto label = QStringView(normalizedDomain).sliced(lastIdx, labelLength); + const qsizetype labelLength = idx - lastIdx; + if (labelLength) { + const auto label = normalizedDomain.sliced(lastIdx, labelLength); aceForm.clear(); qt_punycodeEncoder(label, &aceForm); if (aceForm.isEmpty()) @@ -807,6 +808,9 @@ static QString convertToAscii(const QString &normalizedDomain, AceLeadingDot dot if (idx == normalizedDomain.size()) break; + if (labelLength == 0 && (dot == ForbidLeadingDot || idx > 0)) + return {}; // two delimiters in a row -- empty label not allowed + lastIdx = idx + 1; aceResult += u'.'; } @@ -814,7 +818,7 @@ static QString convertToAscii(const QString &normalizedDomain, AceLeadingDot dot return aceResult; } -static bool checkAsciiDomainName(const QString &normalizedDomain, AceLeadingDot dot, +static bool checkAsciiDomainName(QStringView normalizedDomain, AceLeadingDot dot, bool *usesPunycode) { qsizetype lastIdx = 0; @@ -833,7 +837,7 @@ static bool checkAsciiDomainName(const QString &normalizedDomain, AceLeadingDot if (dot == ForbidLeadingDot || idx > 0) return false; // two delimiters in a row -- empty label not allowed } else { - const auto label = QStringView(normalizedDomain).sliced(lastIdx, labelLength); + const auto label = normalizedDomain.sliced(lastIdx, labelLength); if (!validateAsciiLabel(label)) return false; @@ -886,6 +890,33 @@ static QString convertToUnicode(const QString &asciiDomain, QUrl::AceProcessingO return result; } +static bool checkUnicodeName(const QString &domainName, QUrl::AceProcessingOptions options) +{ + qsizetype lastIdx = 0; + + DomainValidityChecker checker(true); + + while (true) { + qsizetype idx = domainName.indexOf(u'.', lastIdx); + if (idx == -1) + idx = domainName.size(); + + const qsizetype labelLength = idx - lastIdx; + if (labelLength) { + const auto label = domainName.sliced(lastIdx, labelLength); + + if (!checker.checkLabel(label, options)) + return false; + } + + if (idx == domainName.size()) + break; + + lastIdx = idx + 1; + } + return true; +} + QString qt_ACE_do(const QString &domain, AceOperation op, AceLeadingDot dot, QUrl::AceProcessingOptions options) { @@ -900,12 +931,15 @@ QString qt_ACE_do(const QString &domain, AceOperation op, AceLeadingDot dot, if (normalized.isEmpty()) return {}; - bool needsCoversionToUnicode; + if (!mappedToAscii && !checkUnicodeName(normalized, options)) + return {}; + + bool needsConversionToUnicode; const QString aceResult = mappedToAscii ? normalized : convertToAscii(normalized, dot); - if (aceResult.isEmpty() || !checkAsciiDomainName(aceResult, dot, &needsCoversionToUnicode)) + if (aceResult.isEmpty() || !checkAsciiDomainName(aceResult, dot, &needsConversionToUnicode)) return {}; - if (op == ToAceOnly || !needsCoversionToUnicode + if (op == ToAceOnly || !needsConversionToUnicode || (!options.testFlag(QUrl::IgnoreIDNWhitelist) && !qt_is_idn_enabled(aceResult))) { return aceResult; } |