summaryrefslogtreecommitdiffstats
path: root/src/corelib/io/qurlidna.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/corelib/io/qurlidna.cpp')
-rw-r--r--src/corelib/io/qurlidna.cpp112
1 files changed, 73 insertions, 39 deletions
diff --git a/src/corelib/io/qurlidna.cpp b/src/corelib/io/qurlidna.cpp
index 04b9a25886..a2a81c7605 100644
--- a/src/corelib/io/qurlidna.cpp
+++ b/src/corelib/io/qurlidna.cpp
@@ -73,11 +73,11 @@ Q_AUTOTEST_EXPORT void qt_punycodeEncoder(QStringView in, QString *output)
// Do not try to encode strings that certainly will result in output
// that is longer than allowable domain name label length. Note that
// non-BMP codepoints are encoded as two QChars.
- if (in.length() > MaxDomainLabelLength * 2)
+ if (in.size() > MaxDomainLabelLength * 2)
return;
- int outLen = output->length();
- output->resize(outLen + in.length());
+ int outLen = output->size();
+ output->resize(outLen + in.size());
QChar *d = output->data() + outLen;
bool skipped = false;
@@ -132,7 +132,7 @@ Q_AUTOTEST_EXPORT void qt_punycodeEncoder(QStringView in, QString *output)
// delta = delta + (m - n) * (h + 1), fail on overflow
uint tmp;
- if (mul_overflow<uint>(m - n, h + 1, &tmp) || add_overflow<uint>(delta, tmp, &delta)) {
+ if (qMulOverflow<uint>(m - n, h + 1, &tmp) || qAddOverflow<uint>(delta, tmp, &delta)) {
output->truncate(outLen);
return; // punycode_overflow
}
@@ -144,7 +144,7 @@ Q_AUTOTEST_EXPORT void qt_punycodeEncoder(QStringView in, QString *output)
// increase delta until we reach the character processed in this iteration;
// fail if delta overflows.
if (c < n) {
- if (add_overflow<uint>(delta, 1, &delta)) {
+ if (qAddOverflow<uint>(delta, 1, &delta)) {
output->truncate(outLen);
return; // punycode_overflow
}
@@ -177,7 +177,7 @@ Q_AUTOTEST_EXPORT QString qt_punycodeDecoder(const QString &pc)
// Do not try to decode strings longer than allowable for a domain label.
// Non-ASCII strings are not allowed here anyway, so there is no need
// to account for surrogates.
- if (pc.length() > MaxDomainLabelLength)
+ if (pc.size() > MaxDomainLabelLength)
return QString();
// strip any ACE prefix
@@ -219,7 +219,7 @@ Q_AUTOTEST_EXPORT QString qt_punycodeDecoder(const QString &pc)
// i = i + digit * w, fail on overflow
uint tmp;
- if (mul_overflow<uint>(digit, w, &tmp) || add_overflow<uint>(i, tmp, &i))
+ if (qMulOverflow<uint>(digit, w, &tmp) || qAddOverflow<uint>(i, tmp, &i))
return QString();
// detect threshold to stop reading delta digits
@@ -231,7 +231,7 @@ Q_AUTOTEST_EXPORT QString qt_punycodeDecoder(const QString &pc)
if (digit < t) break;
// w = w * (base - t), fail on overflow
- if (mul_overflow<uint>(w, base - t, &w))
+ if (qMulOverflow<uint>(w, base - t, &w))
return QString();
}
@@ -241,7 +241,7 @@ Q_AUTOTEST_EXPORT QString qt_punycodeDecoder(const QString &pc)
bias = adapt(i - oldi, outputLength + 1, oldi == 0);
// n = n + i div (length(output) + 1), fail on overflow
- if (add_overflow<uint>(n, i / (outputLength + 1), &n))
+ if (qAddOverflow<uint>(n, i / (outputLength + 1), &n))
return QString();
// allow the deltas to wrap around
@@ -320,19 +320,19 @@ Q_CONSTINIT static QStringList *user_idn_whitelist = nullptr;
static bool lessThan(const QChar *a, int l, const char *c)
{
- const ushort *uc = (const ushort *)a;
- const ushort *e = uc + l;
+ const auto *uc = reinterpret_cast<const char16_t *>(a);
+ const char16_t *e = uc + l;
if (!c || *c == 0)
return false;
while (*c) {
- if (uc == e || *uc != *c)
+ if (uc == e || *uc != static_cast<unsigned char>(*c))
break;
++uc;
++c;
}
- return (uc == e ? *c : *uc < *c);
+ return uc == e ? *c : (*uc < static_cast<unsigned char>(*c));
}
static bool equal(const QChar *a, int l, const char *b)
@@ -423,13 +423,19 @@ static QString mapDomainName(const QString &in, QUrl::AceProcessingOptions optio
if (uc >= U'A' && uc <= U'Z')
uc |= 0x20; // lower-case it
- if (!isValidInNormalizedAsciiName(uc))
- return {};
+ if (isValidInNormalizedAsciiName(uc)) {
+ result.append(static_cast<char16_t>(uc));
+ continue;
+ }
+ }
+
+ allAscii = false;
- result.append(static_cast<char16_t>(uc));
+ // Capital sharp S is a special case since UTR #46 revision 31 (Unicode 15.1)
+ if (uc == 0x1E9E && options.testFlag(QUrl::AceTransitionalProcessing)) {
+ result.append(u"ss"_s);
continue;
}
- allAscii = false;
QUnicodeTables::IdnaStatus status = QUnicodeTables::idnaStatus(uc);
@@ -442,14 +448,13 @@ static QString mapDomainName(const QString &in, QUrl::AceProcessingOptions optio
case QUnicodeTables::IdnaStatus::Ignored:
continue;
case QUnicodeTables::IdnaStatus::Valid:
+ case QUnicodeTables::IdnaStatus::Disallowed:
for (auto c : QChar::fromUcs4(uc))
result.append(c);
break;
case QUnicodeTables::IdnaStatus::Mapped:
result.append(QUnicodeTables::idnaMapping(uc));
break;
- case QUnicodeTables::IdnaStatus::Disallowed:
- return {};
default:
Q_UNREACHABLE();
}
@@ -468,7 +473,7 @@ static QString mapDomainName(const QString &in, QUrl::AceProcessingOptions optio
*/
static bool validateAsciiLabel(QStringView label)
{
- if (label.length() > MaxDomainLabelLength)
+ if (label.size() > MaxDomainLabelLength)
return false;
if (label.first() == u'-' || label.last() == u'-')
@@ -483,12 +488,13 @@ class DomainValidityChecker
{
bool domainNameIsBidi = false;
bool hadBidiErrors = false;
+ bool ignoreBidiErrors;
static constexpr char32_t ZWNJ = U'\u200C';
static constexpr char32_t ZWJ = U'\u200D';
public:
- DomainValidityChecker() { }
+ DomainValidityChecker(bool ignoreBidiErrors = false) : ignoreBidiErrors(ignoreBidiErrors) { }
bool checkLabel(const QString &label, QUrl::AceProcessingOptions options);
private:
@@ -709,12 +715,12 @@ bool DomainValidityChecker::checkLabel(const QString &label, QUrl::AceProcessing
if (label != label.normalized(QString::NormalizationForm_C))
return false;
- if (label.length() >= 4) {
+ if (label.size() >= 4) {
// This assumes that the first two characters are in BMP, but that's ok
// because non-BMP characters are unlikely to be used for specifying
// future extensions.
if (label[2] == u'-' && label[3] == u'-')
- return false;
+ return ignoreBidiErrors && label.startsWith(u"xn") && validateAsciiLabel(label);
}
if (label.startsWith(u'-') || label.endsWith(u'-'))
@@ -736,7 +742,7 @@ bool DomainValidityChecker::checkLabel(const QString &label, QUrl::AceProcessing
for (;;) {
hasJoiners = hasJoiners || c == ZWNJ || c == ZWJ;
- if (!domainNameIsBidi) {
+ if (!ignoreBidiErrors && !domainNameIsBidi) {
switch (QChar::direction(c)) {
case QChar::DirR:
case QChar::DirAL:
@@ -777,25 +783,20 @@ bool DomainValidityChecker::checkLabel(const QString &label, QUrl::AceProcessing
return true;
}
-static QString convertToAscii(const QString &normalizedDomain, AceLeadingDot dot)
+static QString convertToAscii(QStringView normalizedDomain, AceLeadingDot dot)
{
qsizetype lastIdx = 0;
QString aceForm; // this variable is here for caching
QString aceResult;
while (true) {
- auto idx = normalizedDomain.indexOf(u'.', lastIdx);
+ qsizetype idx = normalizedDomain.indexOf(u'.', lastIdx);
if (idx == -1)
idx = normalizedDomain.size();
- const auto labelLength = idx - lastIdx;
- if (labelLength == 0) {
- if (idx == normalizedDomain.size())
- break;
- if (dot == ForbidLeadingDot || idx > 0)
- return {}; // two delimiters in a row -- empty label not allowed
- } else {
- const auto label = QStringView(normalizedDomain).sliced(lastIdx, labelLength);
+ const qsizetype labelLength = idx - lastIdx;
+ if (labelLength) {
+ const auto label = normalizedDomain.sliced(lastIdx, labelLength);
aceForm.clear();
qt_punycodeEncoder(label, &aceForm);
if (aceForm.isEmpty())
@@ -807,6 +808,9 @@ static QString convertToAscii(const QString &normalizedDomain, AceLeadingDot dot
if (idx == normalizedDomain.size())
break;
+ if (labelLength == 0 && (dot == ForbidLeadingDot || idx > 0))
+ return {}; // two delimiters in a row -- empty label not allowed
+
lastIdx = idx + 1;
aceResult += u'.';
}
@@ -814,7 +818,7 @@ static QString convertToAscii(const QString &normalizedDomain, AceLeadingDot dot
return aceResult;
}
-static bool checkAsciiDomainName(const QString &normalizedDomain, AceLeadingDot dot,
+static bool checkAsciiDomainName(QStringView normalizedDomain, AceLeadingDot dot,
bool *usesPunycode)
{
qsizetype lastIdx = 0;
@@ -833,7 +837,7 @@ static bool checkAsciiDomainName(const QString &normalizedDomain, AceLeadingDot
if (dot == ForbidLeadingDot || idx > 0)
return false; // two delimiters in a row -- empty label not allowed
} else {
- const auto label = QStringView(normalizedDomain).sliced(lastIdx, labelLength);
+ const auto label = normalizedDomain.sliced(lastIdx, labelLength);
if (!validateAsciiLabel(label))
return false;
@@ -886,6 +890,33 @@ static QString convertToUnicode(const QString &asciiDomain, QUrl::AceProcessingO
return result;
}
+static bool checkUnicodeName(const QString &domainName, QUrl::AceProcessingOptions options)
+{
+ qsizetype lastIdx = 0;
+
+ DomainValidityChecker checker(true);
+
+ while (true) {
+ qsizetype idx = domainName.indexOf(u'.', lastIdx);
+ if (idx == -1)
+ idx = domainName.size();
+
+ const qsizetype labelLength = idx - lastIdx;
+ if (labelLength) {
+ const auto label = domainName.sliced(lastIdx, labelLength);
+
+ if (!checker.checkLabel(label, options))
+ return false;
+ }
+
+ if (idx == domainName.size())
+ break;
+
+ lastIdx = idx + 1;
+ }
+ return true;
+}
+
QString qt_ACE_do(const QString &domain, AceOperation op, AceLeadingDot dot,
QUrl::AceProcessingOptions options)
{
@@ -900,12 +931,15 @@ QString qt_ACE_do(const QString &domain, AceOperation op, AceLeadingDot dot,
if (normalized.isEmpty())
return {};
- bool needsCoversionToUnicode;
+ if (!mappedToAscii && !checkUnicodeName(normalized, options))
+ return {};
+
+ bool needsConversionToUnicode;
const QString aceResult = mappedToAscii ? normalized : convertToAscii(normalized, dot);
- if (aceResult.isEmpty() || !checkAsciiDomainName(aceResult, dot, &needsCoversionToUnicode))
+ if (aceResult.isEmpty() || !checkAsciiDomainName(aceResult, dot, &needsConversionToUnicode))
return {};
- if (op == ToAceOnly || !needsCoversionToUnicode
+ if (op == ToAceOnly || !needsConversionToUnicode
|| (!options.testFlag(QUrl::IgnoreIDNWhitelist) && !qt_is_idn_enabled(aceResult))) {
return aceResult;
}