diff options
Diffstat (limited to 'src/corelib/io/qurlidna.cpp')
-rw-r--r-- | src/corelib/io/qurlidna.cpp | 196 |
1 files changed, 98 insertions, 98 deletions
diff --git a/src/corelib/io/qurlidna.cpp b/src/corelib/io/qurlidna.cpp index b3e67fecf7..a2a81c7605 100644 --- a/src/corelib/io/qurlidna.cpp +++ b/src/corelib/io/qurlidna.cpp @@ -1,47 +1,12 @@ -/**************************************************************************** -** -** Copyright (C) 2016 The Qt Company Ltd. -** Copyright (C) 2016 Intel Corporation. -** Contact: https://www.qt.io/licensing/ -** -** This file is part of the QtCore module of the Qt Toolkit. -** -** $QT_BEGIN_LICENSE:LGPL$ -** Commercial License Usage -** Licensees holding valid commercial Qt licenses may use this file in -** accordance with the commercial license agreement provided with the -** Software or, alternatively, in accordance with the terms contained in -** a written agreement between you and The Qt Company. For licensing terms -** and conditions see https://www.qt.io/terms-conditions. For further -** information use the contact form at https://www.qt.io/contact-us. -** -** GNU Lesser General Public License Usage -** Alternatively, this file may be used under the terms of the GNU Lesser -** General Public License version 3 as published by the Free Software -** Foundation and appearing in the file LICENSE.LGPL3 included in the -** packaging of this file. Please review the following information to -** ensure the GNU Lesser General Public License version 3 requirements -** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. -** -** GNU General Public License Usage -** Alternatively, this file may be used under the terms of the GNU -** General Public License version 2.0 or (at your option) the GNU General -** Public license version 3 or any later version approved by the KDE Free -** Qt Foundation. The licenses are as published by the Free Software -** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 -** included in the packaging of this file. Please review the following -** information to ensure the GNU General Public License requirements will -** be met: https://www.gnu.org/licenses/gpl-2.0.html and -** https://www.gnu.org/licenses/gpl-3.0.html. -** -** $QT_END_LICENSE$ -** -****************************************************************************/ +// Copyright (C) 2016 The Qt Company Ltd. +// Copyright (C) 2016 Intel Corporation. +// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only #include "qurl_p.h" #include <QtCore/qstringlist.h> #include <QtCore/private/qnumeric_p.h> +#include <QtCore/private/qoffsetstringarray_p.h> #include <QtCore/private/qstringiterator_p.h> #include <QtCore/private/qunicodetables_p.h> @@ -49,6 +14,8 @@ QT_BEGIN_NAMESPACE +using namespace Qt::StringLiterals; + // needed by the punycode encoder/decoder static const uint base = 36; static const uint tmin = 1; @@ -106,11 +73,11 @@ Q_AUTOTEST_EXPORT void qt_punycodeEncoder(QStringView in, QString *output) // Do not try to encode strings that certainly will result in output // that is longer than allowable domain name label length. Note that // non-BMP codepoints are encoded as two QChars. - if (in.length() > MaxDomainLabelLength * 2) + if (in.size() > MaxDomainLabelLength * 2) return; - int outLen = output->length(); - output->resize(outLen + in.length()); + int outLen = output->size(); + output->resize(outLen + in.size()); QChar *d = output->data() + outLen; bool skipped = false; @@ -136,7 +103,7 @@ Q_AUTOTEST_EXPORT void qt_punycodeEncoder(QStringView in, QString *output) // if basic code points were copied, add the delimiter character. if (h > 0) - *output += QLatin1Char{'-'}; + *output += u'-'; // compute the input length in Unicode code points. uint inputLength = 0; @@ -165,7 +132,7 @@ Q_AUTOTEST_EXPORT void qt_punycodeEncoder(QStringView in, QString *output) // delta = delta + (m - n) * (h + 1), fail on overflow uint tmp; - if (mul_overflow<uint>(m - n, h + 1, &tmp) || add_overflow<uint>(delta, tmp, &delta)) { + if (qMulOverflow<uint>(m - n, h + 1, &tmp) || qAddOverflow<uint>(delta, tmp, &delta)) { output->truncate(outLen); return; // punycode_overflow } @@ -177,7 +144,7 @@ Q_AUTOTEST_EXPORT void qt_punycodeEncoder(QStringView in, QString *output) // increase delta until we reach the character processed in this iteration; // fail if delta overflows. if (c < n) { - if (add_overflow<uint>(delta, 1, &delta)) { + if (qAddOverflow<uint>(delta, 1, &delta)) { output->truncate(outLen); return; // punycode_overflow } @@ -197,7 +164,7 @@ Q_AUTOTEST_EXPORT void qt_punycodeEncoder(QStringView in, QString *output) } // prepend ACE prefix - output->insert(outLen, QLatin1String("xn--")); + output->insert(outLen, "xn--"_L1); return; } @@ -210,17 +177,17 @@ Q_AUTOTEST_EXPORT QString qt_punycodeDecoder(const QString &pc) // Do not try to decode strings longer than allowable for a domain label. // Non-ASCII strings are not allowed here anyway, so there is no need // to account for surrogates. - if (pc.length() > MaxDomainLabelLength) + if (pc.size() > MaxDomainLabelLength) return QString(); // strip any ACE prefix - int start = pc.startsWith(QLatin1String("xn--")) ? 4 : 0; + int start = pc.startsWith("xn--"_L1) ? 4 : 0; if (!start) return pc; // find the last delimiter character '-' in the input array. copy // all data before this delimiter directly to the output array. - int delimiterPos = pc.lastIndexOf(QLatin1Char{'-'}); + int delimiterPos = pc.lastIndexOf(u'-'); auto output = delimiterPos < 4 ? std::u32string() : pc.mid(start, delimiterPos - start).toStdU32String(); @@ -252,7 +219,7 @@ Q_AUTOTEST_EXPORT QString qt_punycodeDecoder(const QString &pc) // i = i + digit * w, fail on overflow uint tmp; - if (mul_overflow<uint>(digit, w, &tmp) || add_overflow<uint>(i, tmp, &i)) + if (qMulOverflow<uint>(digit, w, &tmp) || qAddOverflow<uint>(i, tmp, &i)) return QString(); // detect threshold to stop reading delta digits @@ -264,7 +231,7 @@ Q_AUTOTEST_EXPORT QString qt_punycodeDecoder(const QString &pc) if (digit < t) break; // w = w * (base - t), fail on overflow - if (mul_overflow<uint>(w, base - t, &w)) + if (qMulOverflow<uint>(w, base - t, &w)) return QString(); } @@ -274,7 +241,7 @@ Q_AUTOTEST_EXPORT QString qt_punycodeDecoder(const QString &pc) bias = adapt(i - oldi, outputLength + 1, oldi == 0); // n = n + i div (length(output) + 1), fail on overflow - if (add_overflow<uint>(n, i / (outputLength + 1), &n)) + if (qAddOverflow<uint>(n, i / (outputLength + 1), &n)) return QString(); // allow the deltas to wrap around @@ -310,7 +277,7 @@ Q_AUTOTEST_EXPORT QString qt_punycodeDecoder(const QString &pc) return QString::fromStdU32String(output); } -static const char * const idn_whitelist[] = { +static constexpr auto idn_whitelist = qOffsetStringArray( "ac", "ar", "asia", "at", "biz", "br", "cat", "ch", "cl", "cn", "com", @@ -347,26 +314,25 @@ static const char * const idn_whitelist[] = { "xn--wgbh1c", // Egypt "xn--wgbl6a", // Qatar "xn--xkc2al3hye2a" // Sri Lanka -}; -static const size_t idn_whitelist_size = sizeof idn_whitelist / sizeof *idn_whitelist; +); -static QStringList *user_idn_whitelist = nullptr; +Q_CONSTINIT static QStringList *user_idn_whitelist = nullptr; static bool lessThan(const QChar *a, int l, const char *c) { - const ushort *uc = (const ushort *)a; - const ushort *e = uc + l; + const auto *uc = reinterpret_cast<const char16_t *>(a); + const char16_t *e = uc + l; if (!c || *c == 0) return false; while (*c) { - if (uc == e || *uc != *c) + if (uc == e || *uc != static_cast<unsigned char>(*c)) break; ++uc; ++c; } - return (uc == e ? *c : *uc < *c); + return uc == e ? *c : (*uc < static_cast<unsigned char>(*c)); } static bool equal(const QChar *a, int l, const char *b) @@ -383,7 +349,7 @@ static bool equal(const QChar *a, int l, const char *b) static bool qt_is_idn_enabled(QStringView aceDomain) { - auto idx = aceDomain.lastIndexOf(QLatin1Char('.')); + auto idx = aceDomain.lastIndexOf(u'.'); if (idx == -1) return false; @@ -396,17 +362,17 @@ static bool qt_is_idn_enabled(QStringView aceDomain) return user_idn_whitelist->contains(tldString); int l = 0; - int r = idn_whitelist_size - 1; + int r = idn_whitelist.count() - 1; int i = (l + r + 1) / 2; while (r != l) { - if (lessThan(tld, len, idn_whitelist[i])) + if (lessThan(tld, len, idn_whitelist.at(i))) r = i - 1; else l = i; i = (l + r + 1) / 2; } - return equal(tld, len, idn_whitelist[i]); + return equal(tld, len, idn_whitelist.at(i)); } template<typename C> @@ -457,13 +423,19 @@ static QString mapDomainName(const QString &in, QUrl::AceProcessingOptions optio if (uc >= U'A' && uc <= U'Z') uc |= 0x20; // lower-case it - if (!isValidInNormalizedAsciiName(uc)) - return {}; + if (isValidInNormalizedAsciiName(uc)) { + result.append(static_cast<char16_t>(uc)); + continue; + } + } - result.append(static_cast<char16_t>(uc)); + allAscii = false; + + // Capital sharp S is a special case since UTR #46 revision 31 (Unicode 15.1) + if (uc == 0x1E9E && options.testFlag(QUrl::AceTransitionalProcessing)) { + result.append(u"ss"_s); continue; } - allAscii = false; QUnicodeTables::IdnaStatus status = QUnicodeTables::idnaStatus(uc); @@ -476,14 +448,13 @@ static QString mapDomainName(const QString &in, QUrl::AceProcessingOptions optio case QUnicodeTables::IdnaStatus::Ignored: continue; case QUnicodeTables::IdnaStatus::Valid: + case QUnicodeTables::IdnaStatus::Disallowed: for (auto c : QChar::fromUcs4(uc)) result.append(c); break; case QUnicodeTables::IdnaStatus::Mapped: result.append(QUnicodeTables::idnaMapping(uc)); break; - case QUnicodeTables::IdnaStatus::Disallowed: - return {}; default: Q_UNREACHABLE(); } @@ -502,7 +473,7 @@ static QString mapDomainName(const QString &in, QUrl::AceProcessingOptions optio */ static bool validateAsciiLabel(QStringView label) { - if (label.length() > MaxDomainLabelLength) + if (label.size() > MaxDomainLabelLength) return false; if (label.first() == u'-' || label.last() == u'-') @@ -517,12 +488,13 @@ class DomainValidityChecker { bool domainNameIsBidi = false; bool hadBidiErrors = false; + bool ignoreBidiErrors; static constexpr char32_t ZWNJ = U'\u200C'; static constexpr char32_t ZWJ = U'\u200D'; public: - DomainValidityChecker() { } + DomainValidityChecker(bool ignoreBidiErrors = false) : ignoreBidiErrors(ignoreBidiErrors) { } bool checkLabel(const QString &label, QUrl::AceProcessingOptions options); private: @@ -743,18 +715,18 @@ bool DomainValidityChecker::checkLabel(const QString &label, QUrl::AceProcessing if (label != label.normalized(QString::NormalizationForm_C)) return false; - if (label.length() >= 4) { + if (label.size() >= 4) { // This assumes that the first two characters are in BMP, but that's ok // because non-BMP characters are unlikely to be used for specifying // future extensions. - if (label[2] == QLatin1Char('-') && label[3] == QLatin1Char('-')) - return false; + if (label[2] == u'-' && label[3] == u'-') + return ignoreBidiErrors && label.startsWith(u"xn") && validateAsciiLabel(label); } - if (label.startsWith(QLatin1Char('-')) || label.endsWith(QLatin1Char('-'))) + if (label.startsWith(u'-') || label.endsWith(u'-')) return false; - if (label.contains(QLatin1Char('.'))) + if (label.contains(u'.')) return false; QStringIterator iter(label); @@ -770,7 +742,7 @@ bool DomainValidityChecker::checkLabel(const QString &label, QUrl::AceProcessing for (;;) { hasJoiners = hasJoiners || c == ZWNJ || c == ZWJ; - if (!domainNameIsBidi) { + if (!ignoreBidiErrors && !domainNameIsBidi) { switch (QChar::direction(c)) { case QChar::DirR: case QChar::DirAL: @@ -811,25 +783,20 @@ bool DomainValidityChecker::checkLabel(const QString &label, QUrl::AceProcessing return true; } -static QString convertToAscii(const QString &normalizedDomain, AceLeadingDot dot) +static QString convertToAscii(QStringView normalizedDomain, AceLeadingDot dot) { qsizetype lastIdx = 0; QString aceForm; // this variable is here for caching QString aceResult; while (true) { - auto idx = normalizedDomain.indexOf(u'.', lastIdx); + qsizetype idx = normalizedDomain.indexOf(u'.', lastIdx); if (idx == -1) idx = normalizedDomain.size(); - const auto labelLength = idx - lastIdx; - if (labelLength == 0) { - if (idx == normalizedDomain.size()) - break; - if (dot == ForbidLeadingDot || idx > 0) - return {}; // two delimiters in a row -- empty label not allowed - } else { - const auto label = QStringView(normalizedDomain).sliced(lastIdx, labelLength); + const qsizetype labelLength = idx - lastIdx; + if (labelLength) { + const auto label = normalizedDomain.sliced(lastIdx, labelLength); aceForm.clear(); qt_punycodeEncoder(label, &aceForm); if (aceForm.isEmpty()) @@ -841,6 +808,9 @@ static QString convertToAscii(const QString &normalizedDomain, AceLeadingDot dot if (idx == normalizedDomain.size()) break; + if (labelLength == 0 && (dot == ForbidLeadingDot || idx > 0)) + return {}; // two delimiters in a row -- empty label not allowed + lastIdx = idx + 1; aceResult += u'.'; } @@ -848,7 +818,7 @@ static QString convertToAscii(const QString &normalizedDomain, AceLeadingDot dot return aceResult; } -static bool checkAsciiDomainName(const QString &normalizedDomain, AceLeadingDot dot, +static bool checkAsciiDomainName(QStringView normalizedDomain, AceLeadingDot dot, bool *usesPunycode) { qsizetype lastIdx = 0; @@ -867,11 +837,11 @@ static bool checkAsciiDomainName(const QString &normalizedDomain, AceLeadingDot if (dot == ForbidLeadingDot || idx > 0) return false; // two delimiters in a row -- empty label not allowed } else { - const auto label = QStringView(normalizedDomain).sliced(lastIdx, labelLength); + const auto label = normalizedDomain.sliced(lastIdx, labelLength); if (!validateAsciiLabel(label)) return false; - hasPunycode = hasPunycode || label.startsWith(QLatin1String("xn--")); + hasPunycode = hasPunycode || label.startsWith("xn--"_L1); } lastIdx = idx + 1; @@ -920,6 +890,33 @@ static QString convertToUnicode(const QString &asciiDomain, QUrl::AceProcessingO return result; } +static bool checkUnicodeName(const QString &domainName, QUrl::AceProcessingOptions options) +{ + qsizetype lastIdx = 0; + + DomainValidityChecker checker(true); + + while (true) { + qsizetype idx = domainName.indexOf(u'.', lastIdx); + if (idx == -1) + idx = domainName.size(); + + const qsizetype labelLength = idx - lastIdx; + if (labelLength) { + const auto label = domainName.sliced(lastIdx, labelLength); + + if (!checker.checkLabel(label, options)) + return false; + } + + if (idx == domainName.size()) + break; + + lastIdx = idx + 1; + } + return true; +} + QString qt_ACE_do(const QString &domain, AceOperation op, AceLeadingDot dot, QUrl::AceProcessingOptions options) { @@ -934,12 +931,15 @@ QString qt_ACE_do(const QString &domain, AceOperation op, AceLeadingDot dot, if (normalized.isEmpty()) return {}; - bool needsCoversionToUnicode; + if (!mappedToAscii && !checkUnicodeName(normalized, options)) + return {}; + + bool needsConversionToUnicode; const QString aceResult = mappedToAscii ? normalized : convertToAscii(normalized, dot); - if (aceResult.isEmpty() || !checkAsciiDomainName(aceResult, dot, &needsCoversionToUnicode)) + if (aceResult.isEmpty() || !checkAsciiDomainName(aceResult, dot, &needsConversionToUnicode)) return {}; - if (op == ToAceOnly || !needsCoversionToUnicode + if (op == ToAceOnly || !needsConversionToUnicode || (!options.testFlag(QUrl::IgnoreIDNWhitelist) && !qt_is_idn_enabled(aceResult))) { return aceResult; } @@ -963,10 +963,10 @@ QStringList QUrl::idnWhitelist() return *user_idn_whitelist; static const QStringList list = [] { QStringList list; - list.reserve(idn_whitelist_size); - unsigned int i = 0; - while (i < idn_whitelist_size) { - list << QLatin1String(idn_whitelist[i]); + list.reserve(idn_whitelist.count()); + int i = 0; + while (i < idn_whitelist.count()) { + list << QLatin1StringView(idn_whitelist.at(i)); ++i; } return list; |