summaryrefslogtreecommitdiffstats
path: root/src/corelib/io/qurlidna.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/corelib/io/qurlidna.cpp')
-rw-r--r--src/corelib/io/qurlidna.cpp92
1 files changed, 63 insertions, 29 deletions
diff --git a/src/corelib/io/qurlidna.cpp b/src/corelib/io/qurlidna.cpp
index da592eb81b..a2a81c7605 100644
--- a/src/corelib/io/qurlidna.cpp
+++ b/src/corelib/io/qurlidna.cpp
@@ -132,7 +132,7 @@ Q_AUTOTEST_EXPORT void qt_punycodeEncoder(QStringView in, QString *output)
// delta = delta + (m - n) * (h + 1), fail on overflow
uint tmp;
- if (mul_overflow<uint>(m - n, h + 1, &tmp) || add_overflow<uint>(delta, tmp, &delta)) {
+ if (qMulOverflow<uint>(m - n, h + 1, &tmp) || qAddOverflow<uint>(delta, tmp, &delta)) {
output->truncate(outLen);
return; // punycode_overflow
}
@@ -144,7 +144,7 @@ Q_AUTOTEST_EXPORT void qt_punycodeEncoder(QStringView in, QString *output)
// increase delta until we reach the character processed in this iteration;
// fail if delta overflows.
if (c < n) {
- if (add_overflow<uint>(delta, 1, &delta)) {
+ if (qAddOverflow<uint>(delta, 1, &delta)) {
output->truncate(outLen);
return; // punycode_overflow
}
@@ -219,7 +219,7 @@ Q_AUTOTEST_EXPORT QString qt_punycodeDecoder(const QString &pc)
// i = i + digit * w, fail on overflow
uint tmp;
- if (mul_overflow<uint>(digit, w, &tmp) || add_overflow<uint>(i, tmp, &i))
+ if (qMulOverflow<uint>(digit, w, &tmp) || qAddOverflow<uint>(i, tmp, &i))
return QString();
// detect threshold to stop reading delta digits
@@ -231,7 +231,7 @@ Q_AUTOTEST_EXPORT QString qt_punycodeDecoder(const QString &pc)
if (digit < t) break;
// w = w * (base - t), fail on overflow
- if (mul_overflow<uint>(w, base - t, &w))
+ if (qMulOverflow<uint>(w, base - t, &w))
return QString();
}
@@ -241,7 +241,7 @@ Q_AUTOTEST_EXPORT QString qt_punycodeDecoder(const QString &pc)
bias = adapt(i - oldi, outputLength + 1, oldi == 0);
// n = n + i div (length(output) + 1), fail on overflow
- if (add_overflow<uint>(n, i / (outputLength + 1), &n))
+ if (qAddOverflow<uint>(n, i / (outputLength + 1), &n))
return QString();
// allow the deltas to wrap around
@@ -423,13 +423,19 @@ static QString mapDomainName(const QString &in, QUrl::AceProcessingOptions optio
if (uc >= U'A' && uc <= U'Z')
uc |= 0x20; // lower-case it
- if (!isValidInNormalizedAsciiName(uc))
- return {};
+ if (isValidInNormalizedAsciiName(uc)) {
+ result.append(static_cast<char16_t>(uc));
+ continue;
+ }
+ }
+
+ allAscii = false;
- result.append(static_cast<char16_t>(uc));
+ // Capital sharp S is a special case since UTR #46 revision 31 (Unicode 15.1)
+ if (uc == 0x1E9E && options.testFlag(QUrl::AceTransitionalProcessing)) {
+ result.append(u"ss"_s);
continue;
}
- allAscii = false;
QUnicodeTables::IdnaStatus status = QUnicodeTables::idnaStatus(uc);
@@ -442,14 +448,13 @@ static QString mapDomainName(const QString &in, QUrl::AceProcessingOptions optio
case QUnicodeTables::IdnaStatus::Ignored:
continue;
case QUnicodeTables::IdnaStatus::Valid:
+ case QUnicodeTables::IdnaStatus::Disallowed:
for (auto c : QChar::fromUcs4(uc))
result.append(c);
break;
case QUnicodeTables::IdnaStatus::Mapped:
result.append(QUnicodeTables::idnaMapping(uc));
break;
- case QUnicodeTables::IdnaStatus::Disallowed:
- return {};
default:
Q_UNREACHABLE();
}
@@ -483,12 +488,13 @@ class DomainValidityChecker
{
bool domainNameIsBidi = false;
bool hadBidiErrors = false;
+ bool ignoreBidiErrors;
static constexpr char32_t ZWNJ = U'\u200C';
static constexpr char32_t ZWJ = U'\u200D';
public:
- DomainValidityChecker() { }
+ DomainValidityChecker(bool ignoreBidiErrors = false) : ignoreBidiErrors(ignoreBidiErrors) { }
bool checkLabel(const QString &label, QUrl::AceProcessingOptions options);
private:
@@ -714,7 +720,7 @@ bool DomainValidityChecker::checkLabel(const QString &label, QUrl::AceProcessing
// because non-BMP characters are unlikely to be used for specifying
// future extensions.
if (label[2] == u'-' && label[3] == u'-')
- return false;
+ return ignoreBidiErrors && label.startsWith(u"xn") && validateAsciiLabel(label);
}
if (label.startsWith(u'-') || label.endsWith(u'-'))
@@ -736,7 +742,7 @@ bool DomainValidityChecker::checkLabel(const QString &label, QUrl::AceProcessing
for (;;) {
hasJoiners = hasJoiners || c == ZWNJ || c == ZWJ;
- if (!domainNameIsBidi) {
+ if (!ignoreBidiErrors && !domainNameIsBidi) {
switch (QChar::direction(c)) {
case QChar::DirR:
case QChar::DirAL:
@@ -777,25 +783,20 @@ bool DomainValidityChecker::checkLabel(const QString &label, QUrl::AceProcessing
return true;
}
-static QString convertToAscii(const QString &normalizedDomain, AceLeadingDot dot)
+static QString convertToAscii(QStringView normalizedDomain, AceLeadingDot dot)
{
qsizetype lastIdx = 0;
QString aceForm; // this variable is here for caching
QString aceResult;
while (true) {
- auto idx = normalizedDomain.indexOf(u'.', lastIdx);
+ qsizetype idx = normalizedDomain.indexOf(u'.', lastIdx);
if (idx == -1)
idx = normalizedDomain.size();
- const auto labelLength = idx - lastIdx;
- if (labelLength == 0) {
- if (idx == normalizedDomain.size())
- break;
- if (dot == ForbidLeadingDot || idx > 0)
- return {}; // two delimiters in a row -- empty label not allowed
- } else {
- const auto label = QStringView(normalizedDomain).sliced(lastIdx, labelLength);
+ const qsizetype labelLength = idx - lastIdx;
+ if (labelLength) {
+ const auto label = normalizedDomain.sliced(lastIdx, labelLength);
aceForm.clear();
qt_punycodeEncoder(label, &aceForm);
if (aceForm.isEmpty())
@@ -807,6 +808,9 @@ static QString convertToAscii(const QString &normalizedDomain, AceLeadingDot dot
if (idx == normalizedDomain.size())
break;
+ if (labelLength == 0 && (dot == ForbidLeadingDot || idx > 0))
+ return {}; // two delimiters in a row -- empty label not allowed
+
lastIdx = idx + 1;
aceResult += u'.';
}
@@ -814,7 +818,7 @@ static QString convertToAscii(const QString &normalizedDomain, AceLeadingDot dot
return aceResult;
}
-static bool checkAsciiDomainName(const QString &normalizedDomain, AceLeadingDot dot,
+static bool checkAsciiDomainName(QStringView normalizedDomain, AceLeadingDot dot,
bool *usesPunycode)
{
qsizetype lastIdx = 0;
@@ -833,7 +837,7 @@ static bool checkAsciiDomainName(const QString &normalizedDomain, AceLeadingDot
if (dot == ForbidLeadingDot || idx > 0)
return false; // two delimiters in a row -- empty label not allowed
} else {
- const auto label = QStringView(normalizedDomain).sliced(lastIdx, labelLength);
+ const auto label = normalizedDomain.sliced(lastIdx, labelLength);
if (!validateAsciiLabel(label))
return false;
@@ -886,6 +890,33 @@ static QString convertToUnicode(const QString &asciiDomain, QUrl::AceProcessingO
return result;
}
+static bool checkUnicodeName(const QString &domainName, QUrl::AceProcessingOptions options)
+{
+ qsizetype lastIdx = 0;
+
+ DomainValidityChecker checker(true);
+
+ while (true) {
+ qsizetype idx = domainName.indexOf(u'.', lastIdx);
+ if (idx == -1)
+ idx = domainName.size();
+
+ const qsizetype labelLength = idx - lastIdx;
+ if (labelLength) {
+ const auto label = domainName.sliced(lastIdx, labelLength);
+
+ if (!checker.checkLabel(label, options))
+ return false;
+ }
+
+ if (idx == domainName.size())
+ break;
+
+ lastIdx = idx + 1;
+ }
+ return true;
+}
+
QString qt_ACE_do(const QString &domain, AceOperation op, AceLeadingDot dot,
QUrl::AceProcessingOptions options)
{
@@ -900,12 +931,15 @@ QString qt_ACE_do(const QString &domain, AceOperation op, AceLeadingDot dot,
if (normalized.isEmpty())
return {};
- bool needsCoversionToUnicode;
+ if (!mappedToAscii && !checkUnicodeName(normalized, options))
+ return {};
+
+ bool needsConversionToUnicode;
const QString aceResult = mappedToAscii ? normalized : convertToAscii(normalized, dot);
- if (aceResult.isEmpty() || !checkAsciiDomainName(aceResult, dot, &needsCoversionToUnicode))
+ if (aceResult.isEmpty() || !checkAsciiDomainName(aceResult, dot, &needsConversionToUnicode))
return {};
- if (op == ToAceOnly || !needsCoversionToUnicode
+ if (op == ToAceOnly || !needsConversionToUnicode
|| (!options.testFlag(QUrl::IgnoreIDNWhitelist) && !qt_is_idn_enabled(aceResult))) {
return aceResult;
}