summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/corelib/io/qurlidna.cpp53
-rw-r--r--tests/auto/corelib/io/qurlinternal/tst_qurlinternal.cpp1
2 files changed, 39 insertions, 15 deletions
diff --git a/src/corelib/io/qurlidna.cpp b/src/corelib/io/qurlidna.cpp
index 774903c35b..c1a5220dbb 100644
--- a/src/corelib/io/qurlidna.cpp
+++ b/src/corelib/io/qurlidna.cpp
@@ -2252,15 +2252,27 @@ Q_AUTOTEST_EXPORT void qt_punycodeEncoder(QStringView in, QString *output)
if (h > 0)
*output += QLatin1Char{'-'};
+ // compute the input length in Unicode code points.
+ qsizetype inputLength = 0;
+ for (QStringIterator iter(in); iter.hasNext();) {
+ inputLength++;
+
+ if (iter.next(char32_t(-1)) == char32_t(-1)) {
+ output->truncate(outLen);
+ return; // invalid surrogate pair
+ }
+ }
+
// while there are still unprocessed non-basic code points left in
// the input string...
- while (h < (uint) in.length()) {
+ while (h < inputLength) {
// find the character in the input string with the lowest
// unicode value.
uint m = Q_MAXINT;
- for (QChar c : in) {
- if (c.unicode() >= n && c.unicode() < m)
- m = (uint) c.unicode();
+ for (QStringIterator iter(in); iter.hasNext();) {
+ auto c = iter.nextUnchecked();
+ if (c >= n && c < m)
+ m = c;
}
// reject out-of-bounds unicode characters
@@ -2272,11 +2284,12 @@ Q_AUTOTEST_EXPORT void qt_punycodeEncoder(QStringView in, QString *output)
delta += (m - n) * (h + 1);
n = m;
- for (QChar c : in) {
+ for (QStringIterator iter(in); iter.hasNext();) {
+ auto c = iter.nextUnchecked();
// increase delta until we reach the character with the
// lowest unicode code. fail if delta overflows.
- if (c.unicode() < n) {
+ if (c < n) {
++delta;
if (!delta) {
output->truncate(outLen);
@@ -2286,7 +2299,7 @@ Q_AUTOTEST_EXPORT void qt_punycodeEncoder(QStringView in, QString *output)
// if j is the index of the character with the lowest
// unicode code...
- if (c.unicode() == n) {
+ if (c == n) {
appendEncode(output, delta, bias, b, h);
}
}
@@ -2314,8 +2327,8 @@ Q_AUTOTEST_EXPORT QString qt_punycodeDecoder(const QString &pc)
// find the last delimiter character '-' in the input array. copy
// all data before this delimiter directly to the output array.
int delimiterPos = pc.lastIndexOf(QLatin1Char{'-'});
- QString output = delimiterPos < 4 ?
- QString() : pc.mid(start, delimiterPos - start);
+ auto output = delimiterPos < 4 ? std::u32string()
+ : pc.mid(start, delimiterPos - start).toStdU32String();
// if a delimiter was found, skip to the position after it;
// otherwise start at the front of the input string. everything
@@ -2357,18 +2370,30 @@ Q_AUTOTEST_EXPORT QString qt_punycodeDecoder(const QString &pc)
// find new bias and calculate the next non-basic code
// character.
- bias = adapt(i - oldi, output.length() + 1, oldi == 0);
- n += i / (output.length() + 1);
+ uint outputLength = static_cast<uint>(output.length());
+ bias = adapt(i - oldi, outputLength + 1, oldi == 0);
+ n += i / (outputLength + 1);
// allow the deltas to wrap around
- i %= (output.length() + 1);
+ i %= (outputLength + 1);
+
+ // Surrogates should normally be rejected later by other IDNA code.
+ // But because of Qt's use of UTF-16 to represent strings the
+ // IDNA code is not able to distinguish characters represented as pairs
+ // of surrogates from normal code points. This is why surrogates are
+ // not allowed here.
+ //
+ // Allowing surrogates would lead to non-unique (after normalization)
+ // encoding of strings with non-BMP characters.
+ if (QChar::isSurrogate(n))
+ return QString();
// insert the character n at position i
- output.insert((uint) i, QChar((ushort) n));
+ output.insert(i, 1, static_cast<char32_t>(n));
++i;
}
- return output;
+ return QString::fromStdU32String(output);
}
static const char * const idn_whitelist[] = {
diff --git a/tests/auto/corelib/io/qurlinternal/tst_qurlinternal.cpp b/tests/auto/corelib/io/qurlinternal/tst_qurlinternal.cpp
index f9876fc12e..7f2c506a1a 100644
--- a/tests/auto/corelib/io/qurlinternal/tst_qurlinternal.cpp
+++ b/tests/auto/corelib/io/qurlinternal/tst_qurlinternal.cpp
@@ -246,7 +246,6 @@ void tst_QUrlInternal::idna_testsuite()
QString result;
qt_punycodeEncoder(QStringView{unicode.points, numchars}, &result);
- QEXPECT_FAIL("U+102F7", "QTBUG-95577: Non-BMP handling is broken", Continue);
QCOMPARE(result.toLatin1(), punycode);
QCOMPARE(qt_punycodeDecoder(result), QString::fromUtf16(unicode.points, numchars));
}