summaryrefslogtreecommitdiffstats
path: root/src/corelib/io/qurlidna.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/corelib/io/qurlidna.cpp')
-rw-r--r--src/corelib/io/qurlidna.cpp196
1 files changed, 98 insertions, 98 deletions
diff --git a/src/corelib/io/qurlidna.cpp b/src/corelib/io/qurlidna.cpp
index b3e67fecf7..a2a81c7605 100644
--- a/src/corelib/io/qurlidna.cpp
+++ b/src/corelib/io/qurlidna.cpp
@@ -1,47 +1,12 @@
-/****************************************************************************
-**
-** Copyright (C) 2016 The Qt Company Ltd.
-** Copyright (C) 2016 Intel Corporation.
-** Contact: https://www.qt.io/licensing/
-**
-** This file is part of the QtCore module of the Qt Toolkit.
-**
-** $QT_BEGIN_LICENSE:LGPL$
-** Commercial License Usage
-** Licensees holding valid commercial Qt licenses may use this file in
-** accordance with the commercial license agreement provided with the
-** Software or, alternatively, in accordance with the terms contained in
-** a written agreement between you and The Qt Company. For licensing terms
-** and conditions see https://www.qt.io/terms-conditions. For further
-** information use the contact form at https://www.qt.io/contact-us.
-**
-** GNU Lesser General Public License Usage
-** Alternatively, this file may be used under the terms of the GNU Lesser
-** General Public License version 3 as published by the Free Software
-** Foundation and appearing in the file LICENSE.LGPL3 included in the
-** packaging of this file. Please review the following information to
-** ensure the GNU Lesser General Public License version 3 requirements
-** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
-**
-** GNU General Public License Usage
-** Alternatively, this file may be used under the terms of the GNU
-** General Public License version 2.0 or (at your option) the GNU General
-** Public license version 3 or any later version approved by the KDE Free
-** Qt Foundation. The licenses are as published by the Free Software
-** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
-** included in the packaging of this file. Please review the following
-** information to ensure the GNU General Public License requirements will
-** be met: https://www.gnu.org/licenses/gpl-2.0.html and
-** https://www.gnu.org/licenses/gpl-3.0.html.
-**
-** $QT_END_LICENSE$
-**
-****************************************************************************/
+// Copyright (C) 2016 The Qt Company Ltd.
+// Copyright (C) 2016 Intel Corporation.
+// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
#include "qurl_p.h"
#include <QtCore/qstringlist.h>
#include <QtCore/private/qnumeric_p.h>
+#include <QtCore/private/qoffsetstringarray_p.h>
#include <QtCore/private/qstringiterator_p.h>
#include <QtCore/private/qunicodetables_p.h>
@@ -49,6 +14,8 @@
QT_BEGIN_NAMESPACE
+using namespace Qt::StringLiterals;
+
// needed by the punycode encoder/decoder
static const uint base = 36;
static const uint tmin = 1;
@@ -106,11 +73,11 @@ Q_AUTOTEST_EXPORT void qt_punycodeEncoder(QStringView in, QString *output)
// Do not try to encode strings that certainly will result in output
// that is longer than allowable domain name label length. Note that
// non-BMP codepoints are encoded as two QChars.
- if (in.length() > MaxDomainLabelLength * 2)
+ if (in.size() > MaxDomainLabelLength * 2)
return;
- int outLen = output->length();
- output->resize(outLen + in.length());
+ int outLen = output->size();
+ output->resize(outLen + in.size());
QChar *d = output->data() + outLen;
bool skipped = false;
@@ -136,7 +103,7 @@ Q_AUTOTEST_EXPORT void qt_punycodeEncoder(QStringView in, QString *output)
// if basic code points were copied, add the delimiter character.
if (h > 0)
- *output += QLatin1Char{'-'};
+ *output += u'-';
// compute the input length in Unicode code points.
uint inputLength = 0;
@@ -165,7 +132,7 @@ Q_AUTOTEST_EXPORT void qt_punycodeEncoder(QStringView in, QString *output)
// delta = delta + (m - n) * (h + 1), fail on overflow
uint tmp;
- if (mul_overflow<uint>(m - n, h + 1, &tmp) || add_overflow<uint>(delta, tmp, &delta)) {
+ if (qMulOverflow<uint>(m - n, h + 1, &tmp) || qAddOverflow<uint>(delta, tmp, &delta)) {
output->truncate(outLen);
return; // punycode_overflow
}
@@ -177,7 +144,7 @@ Q_AUTOTEST_EXPORT void qt_punycodeEncoder(QStringView in, QString *output)
// increase delta until we reach the character processed in this iteration;
// fail if delta overflows.
if (c < n) {
- if (add_overflow<uint>(delta, 1, &delta)) {
+ if (qAddOverflow<uint>(delta, 1, &delta)) {
output->truncate(outLen);
return; // punycode_overflow
}
@@ -197,7 +164,7 @@ Q_AUTOTEST_EXPORT void qt_punycodeEncoder(QStringView in, QString *output)
}
// prepend ACE prefix
- output->insert(outLen, QLatin1String("xn--"));
+ output->insert(outLen, "xn--"_L1);
return;
}
@@ -210,17 +177,17 @@ Q_AUTOTEST_EXPORT QString qt_punycodeDecoder(const QString &pc)
// Do not try to decode strings longer than allowable for a domain label.
// Non-ASCII strings are not allowed here anyway, so there is no need
// to account for surrogates.
- if (pc.length() > MaxDomainLabelLength)
+ if (pc.size() > MaxDomainLabelLength)
return QString();
// strip any ACE prefix
- int start = pc.startsWith(QLatin1String("xn--")) ? 4 : 0;
+ int start = pc.startsWith("xn--"_L1) ? 4 : 0;
if (!start)
return pc;
// find the last delimiter character '-' in the input array. copy
// all data before this delimiter directly to the output array.
- int delimiterPos = pc.lastIndexOf(QLatin1Char{'-'});
+ int delimiterPos = pc.lastIndexOf(u'-');
auto output = delimiterPos < 4 ? std::u32string()
: pc.mid(start, delimiterPos - start).toStdU32String();
@@ -252,7 +219,7 @@ Q_AUTOTEST_EXPORT QString qt_punycodeDecoder(const QString &pc)
// i = i + digit * w, fail on overflow
uint tmp;
- if (mul_overflow<uint>(digit, w, &tmp) || add_overflow<uint>(i, tmp, &i))
+ if (qMulOverflow<uint>(digit, w, &tmp) || qAddOverflow<uint>(i, tmp, &i))
return QString();
// detect threshold to stop reading delta digits
@@ -264,7 +231,7 @@ Q_AUTOTEST_EXPORT QString qt_punycodeDecoder(const QString &pc)
if (digit < t) break;
// w = w * (base - t), fail on overflow
- if (mul_overflow<uint>(w, base - t, &w))
+ if (qMulOverflow<uint>(w, base - t, &w))
return QString();
}
@@ -274,7 +241,7 @@ Q_AUTOTEST_EXPORT QString qt_punycodeDecoder(const QString &pc)
bias = adapt(i - oldi, outputLength + 1, oldi == 0);
// n = n + i div (length(output) + 1), fail on overflow
- if (add_overflow<uint>(n, i / (outputLength + 1), &n))
+ if (qAddOverflow<uint>(n, i / (outputLength + 1), &n))
return QString();
// allow the deltas to wrap around
@@ -310,7 +277,7 @@ Q_AUTOTEST_EXPORT QString qt_punycodeDecoder(const QString &pc)
return QString::fromStdU32String(output);
}
-static const char * const idn_whitelist[] = {
+static constexpr auto idn_whitelist = qOffsetStringArray(
"ac", "ar", "asia", "at",
"biz", "br",
"cat", "ch", "cl", "cn", "com",
@@ -347,26 +314,25 @@ static const char * const idn_whitelist[] = {
"xn--wgbh1c", // Egypt
"xn--wgbl6a", // Qatar
"xn--xkc2al3hye2a" // Sri Lanka
-};
-static const size_t idn_whitelist_size = sizeof idn_whitelist / sizeof *idn_whitelist;
+);
-static QStringList *user_idn_whitelist = nullptr;
+Q_CONSTINIT static QStringList *user_idn_whitelist = nullptr;
static bool lessThan(const QChar *a, int l, const char *c)
{
- const ushort *uc = (const ushort *)a;
- const ushort *e = uc + l;
+ const auto *uc = reinterpret_cast<const char16_t *>(a);
+ const char16_t *e = uc + l;
if (!c || *c == 0)
return false;
while (*c) {
- if (uc == e || *uc != *c)
+ if (uc == e || *uc != static_cast<unsigned char>(*c))
break;
++uc;
++c;
}
- return (uc == e ? *c : *uc < *c);
+ return uc == e ? *c : (*uc < static_cast<unsigned char>(*c));
}
static bool equal(const QChar *a, int l, const char *b)
@@ -383,7 +349,7 @@ static bool equal(const QChar *a, int l, const char *b)
static bool qt_is_idn_enabled(QStringView aceDomain)
{
- auto idx = aceDomain.lastIndexOf(QLatin1Char('.'));
+ auto idx = aceDomain.lastIndexOf(u'.');
if (idx == -1)
return false;
@@ -396,17 +362,17 @@ static bool qt_is_idn_enabled(QStringView aceDomain)
return user_idn_whitelist->contains(tldString);
int l = 0;
- int r = idn_whitelist_size - 1;
+ int r = idn_whitelist.count() - 1;
int i = (l + r + 1) / 2;
while (r != l) {
- if (lessThan(tld, len, idn_whitelist[i]))
+ if (lessThan(tld, len, idn_whitelist.at(i)))
r = i - 1;
else
l = i;
i = (l + r + 1) / 2;
}
- return equal(tld, len, idn_whitelist[i]);
+ return equal(tld, len, idn_whitelist.at(i));
}
template<typename C>
@@ -457,13 +423,19 @@ static QString mapDomainName(const QString &in, QUrl::AceProcessingOptions optio
if (uc >= U'A' && uc <= U'Z')
uc |= 0x20; // lower-case it
- if (!isValidInNormalizedAsciiName(uc))
- return {};
+ if (isValidInNormalizedAsciiName(uc)) {
+ result.append(static_cast<char16_t>(uc));
+ continue;
+ }
+ }
- result.append(static_cast<char16_t>(uc));
+ allAscii = false;
+
+ // Capital sharp S is a special case since UTR #46 revision 31 (Unicode 15.1)
+ if (uc == 0x1E9E && options.testFlag(QUrl::AceTransitionalProcessing)) {
+ result.append(u"ss"_s);
continue;
}
- allAscii = false;
QUnicodeTables::IdnaStatus status = QUnicodeTables::idnaStatus(uc);
@@ -476,14 +448,13 @@ static QString mapDomainName(const QString &in, QUrl::AceProcessingOptions optio
case QUnicodeTables::IdnaStatus::Ignored:
continue;
case QUnicodeTables::IdnaStatus::Valid:
+ case QUnicodeTables::IdnaStatus::Disallowed:
for (auto c : QChar::fromUcs4(uc))
result.append(c);
break;
case QUnicodeTables::IdnaStatus::Mapped:
result.append(QUnicodeTables::idnaMapping(uc));
break;
- case QUnicodeTables::IdnaStatus::Disallowed:
- return {};
default:
Q_UNREACHABLE();
}
@@ -502,7 +473,7 @@ static QString mapDomainName(const QString &in, QUrl::AceProcessingOptions optio
*/
static bool validateAsciiLabel(QStringView label)
{
- if (label.length() > MaxDomainLabelLength)
+ if (label.size() > MaxDomainLabelLength)
return false;
if (label.first() == u'-' || label.last() == u'-')
@@ -517,12 +488,13 @@ class DomainValidityChecker
{
bool domainNameIsBidi = false;
bool hadBidiErrors = false;
+ bool ignoreBidiErrors;
static constexpr char32_t ZWNJ = U'\u200C';
static constexpr char32_t ZWJ = U'\u200D';
public:
- DomainValidityChecker() { }
+ DomainValidityChecker(bool ignoreBidiErrors = false) : ignoreBidiErrors(ignoreBidiErrors) { }
bool checkLabel(const QString &label, QUrl::AceProcessingOptions options);
private:
@@ -743,18 +715,18 @@ bool DomainValidityChecker::checkLabel(const QString &label, QUrl::AceProcessing
if (label != label.normalized(QString::NormalizationForm_C))
return false;
- if (label.length() >= 4) {
+ if (label.size() >= 4) {
// This assumes that the first two characters are in BMP, but that's ok
// because non-BMP characters are unlikely to be used for specifying
// future extensions.
- if (label[2] == QLatin1Char('-') && label[3] == QLatin1Char('-'))
- return false;
+ if (label[2] == u'-' && label[3] == u'-')
+ return ignoreBidiErrors && label.startsWith(u"xn") && validateAsciiLabel(label);
}
- if (label.startsWith(QLatin1Char('-')) || label.endsWith(QLatin1Char('-')))
+ if (label.startsWith(u'-') || label.endsWith(u'-'))
return false;
- if (label.contains(QLatin1Char('.')))
+ if (label.contains(u'.'))
return false;
QStringIterator iter(label);
@@ -770,7 +742,7 @@ bool DomainValidityChecker::checkLabel(const QString &label, QUrl::AceProcessing
for (;;) {
hasJoiners = hasJoiners || c == ZWNJ || c == ZWJ;
- if (!domainNameIsBidi) {
+ if (!ignoreBidiErrors && !domainNameIsBidi) {
switch (QChar::direction(c)) {
case QChar::DirR:
case QChar::DirAL:
@@ -811,25 +783,20 @@ bool DomainValidityChecker::checkLabel(const QString &label, QUrl::AceProcessing
return true;
}
-static QString convertToAscii(const QString &normalizedDomain, AceLeadingDot dot)
+static QString convertToAscii(QStringView normalizedDomain, AceLeadingDot dot)
{
qsizetype lastIdx = 0;
QString aceForm; // this variable is here for caching
QString aceResult;
while (true) {
- auto idx = normalizedDomain.indexOf(u'.', lastIdx);
+ qsizetype idx = normalizedDomain.indexOf(u'.', lastIdx);
if (idx == -1)
idx = normalizedDomain.size();
- const auto labelLength = idx - lastIdx;
- if (labelLength == 0) {
- if (idx == normalizedDomain.size())
- break;
- if (dot == ForbidLeadingDot || idx > 0)
- return {}; // two delimiters in a row -- empty label not allowed
- } else {
- const auto label = QStringView(normalizedDomain).sliced(lastIdx, labelLength);
+ const qsizetype labelLength = idx - lastIdx;
+ if (labelLength) {
+ const auto label = normalizedDomain.sliced(lastIdx, labelLength);
aceForm.clear();
qt_punycodeEncoder(label, &aceForm);
if (aceForm.isEmpty())
@@ -841,6 +808,9 @@ static QString convertToAscii(const QString &normalizedDomain, AceLeadingDot dot
if (idx == normalizedDomain.size())
break;
+ if (labelLength == 0 && (dot == ForbidLeadingDot || idx > 0))
+ return {}; // two delimiters in a row -- empty label not allowed
+
lastIdx = idx + 1;
aceResult += u'.';
}
@@ -848,7 +818,7 @@ static QString convertToAscii(const QString &normalizedDomain, AceLeadingDot dot
return aceResult;
}
-static bool checkAsciiDomainName(const QString &normalizedDomain, AceLeadingDot dot,
+static bool checkAsciiDomainName(QStringView normalizedDomain, AceLeadingDot dot,
bool *usesPunycode)
{
qsizetype lastIdx = 0;
@@ -867,11 +837,11 @@ static bool checkAsciiDomainName(const QString &normalizedDomain, AceLeadingDot
if (dot == ForbidLeadingDot || idx > 0)
return false; // two delimiters in a row -- empty label not allowed
} else {
- const auto label = QStringView(normalizedDomain).sliced(lastIdx, labelLength);
+ const auto label = normalizedDomain.sliced(lastIdx, labelLength);
if (!validateAsciiLabel(label))
return false;
- hasPunycode = hasPunycode || label.startsWith(QLatin1String("xn--"));
+ hasPunycode = hasPunycode || label.startsWith("xn--"_L1);
}
lastIdx = idx + 1;
@@ -920,6 +890,33 @@ static QString convertToUnicode(const QString &asciiDomain, QUrl::AceProcessingO
return result;
}
+static bool checkUnicodeName(const QString &domainName, QUrl::AceProcessingOptions options)
+{
+ qsizetype lastIdx = 0;
+
+ DomainValidityChecker checker(true);
+
+ while (true) {
+ qsizetype idx = domainName.indexOf(u'.', lastIdx);
+ if (idx == -1)
+ idx = domainName.size();
+
+ const qsizetype labelLength = idx - lastIdx;
+ if (labelLength) {
+ const auto label = domainName.sliced(lastIdx, labelLength);
+
+ if (!checker.checkLabel(label, options))
+ return false;
+ }
+
+ if (idx == domainName.size())
+ break;
+
+ lastIdx = idx + 1;
+ }
+ return true;
+}
+
QString qt_ACE_do(const QString &domain, AceOperation op, AceLeadingDot dot,
QUrl::AceProcessingOptions options)
{
@@ -934,12 +931,15 @@ QString qt_ACE_do(const QString &domain, AceOperation op, AceLeadingDot dot,
if (normalized.isEmpty())
return {};
- bool needsCoversionToUnicode;
+ if (!mappedToAscii && !checkUnicodeName(normalized, options))
+ return {};
+
+ bool needsConversionToUnicode;
const QString aceResult = mappedToAscii ? normalized : convertToAscii(normalized, dot);
- if (aceResult.isEmpty() || !checkAsciiDomainName(aceResult, dot, &needsCoversionToUnicode))
+ if (aceResult.isEmpty() || !checkAsciiDomainName(aceResult, dot, &needsConversionToUnicode))
return {};
- if (op == ToAceOnly || !needsCoversionToUnicode
+ if (op == ToAceOnly || !needsConversionToUnicode
|| (!options.testFlag(QUrl::IgnoreIDNWhitelist) && !qt_is_idn_enabled(aceResult))) {
return aceResult;
}
@@ -963,10 +963,10 @@ QStringList QUrl::idnWhitelist()
return *user_idn_whitelist;
static const QStringList list = [] {
QStringList list;
- list.reserve(idn_whitelist_size);
- unsigned int i = 0;
- while (i < idn_whitelist_size) {
- list << QLatin1String(idn_whitelist[i]);
+ list.reserve(idn_whitelist.count());
+ int i = 0;
+ while (i < idn_whitelist.count()) {
+ list << QLatin1StringView(idn_whitelist.at(i));
++i;
}
return list;