summaryrefslogtreecommitdiffstats
path: root/src/corelib/io/qurl.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/corelib/io/qurl.cpp')
-rw-r--r--src/corelib/io/qurl.cpp318
1 files changed, 217 insertions, 101 deletions
diff --git a/src/corelib/io/qurl.cpp b/src/corelib/io/qurl.cpp
index 180e9ec04..79cd2f03e 100644
--- a/src/corelib/io/qurl.cpp
+++ b/src/corelib/io/qurl.cpp
@@ -2336,12 +2336,12 @@ static const NameprepCaseFoldingEntry NameprepCaseFolding[] = {
{ 0x1D7BB, { 0x03C3, 0x0000, 0x0000, 0x0000 } }
};
-static void mapToLowerCase(QString *str)
+static void mapToLowerCase(QString *str, int from)
{
int N = sizeof(NameprepCaseFolding) / sizeof(NameprepCaseFolding[0]);
QChar *d = 0;
- for (int i = 0; i < str->size(); ++i) {
+ for (int i = from; i < str->size(); ++i) {
int uc = str->at(i).unicode();
if (uc < 0x80) {
if (uc <= 'Z' && uc >= 'A') {
@@ -2388,11 +2388,11 @@ static bool isMappedToNothing(const QChar &ch)
}
-static void stripProhibitedOutput(QString *str)
+static void stripProhibitedOutput(QString *str, int from)
{
- ushort *out = (ushort *)str->data();
+ ushort *out = (ushort *)str->data() + from;
const ushort *in = out;
- const ushort *end = out + str->size();
+ const ushort *end = (ushort *)str->data() + str->size();
while (in < end) {
ushort uc = *in;
if (uc < 0x80 ||
@@ -2901,66 +2901,99 @@ static bool isBidirectionalL(const QChar &ch)
return false;
}
+#ifdef QT_BUILD_INTERNAL
+// export for tst_qurl.cpp
+Q_AUTOTEST_EXPORT void qt_nameprep(QString *source, int from);
+Q_AUTOTEST_EXPORT bool qt_check_std3rules(const QChar *uc, int len);
+#else
+// non-test build, keep the symbols for ourselves
+static void qt_nameprep(QString *source, int from);
+static bool qt_check_std3rules(const QChar *uc, int len);
+#endif
-Q_AUTOTEST_EXPORT QString qt_nameprep(const QString &source)
+void qt_nameprep(QString *source, int from)
{
- QString mapped = source;
-
- bool simple = true;
- for (int i = 0; i < mapped.size(); ++i) {
- ushort uc = mapped.at(i).unicode();
+ QChar *src = source->data(); // causes a detach, so we're sure the only one using it
+ QChar *out = src + from;
+ const QChar *e = src + source->size();
+
+ for ( ; out < e; ++out) {
+ register ushort uc = out->unicode();
if (uc > 0x80) {
- simple = false;
break;
} else if (uc >= 'A' && uc <= 'Z') {
- mapped[i] = QChar(uc | 0x20);
+ *out = QChar(uc | 0x20);
}
}
- if (simple)
- return mapped;
-
+ if (out == e)
+ return; // everything was mapped easily (lowercased, actually)
+ int firstNonAscii = out - src;
+
// Characters commonly mapped to nothing are simply removed
// (Table B.1)
- QChar *out = mapped.data();
const QChar *in = out;
- const QChar *e = in + mapped.size();
while (in < e) {
if (!isMappedToNothing(*in))
*out++ = *in;
++in;
}
if (out != in)
- mapped.truncate(out - mapped.constData());
+ source->truncate(out - src);
// Map to lowercase (Table B.2)
- mapToLowerCase(&mapped);
+ mapToLowerCase(source, firstNonAscii);
// Normalize to Unicode 3.2 form KC
- mapped = mapped.normalized(QString::NormalizationForm_KC, QChar::Unicode_3_2);
+ extern void qt_string_normalize(QString *data, QString::NormalizationForm mode,
+ QChar::UnicodeVersion version, int from);
+ qt_string_normalize(source, QString::NormalizationForm_KC, QChar::Unicode_3_2, firstNonAscii);
// Strip prohibited output
- stripProhibitedOutput(&mapped);
+ stripProhibitedOutput(source, firstNonAscii);
// Check for valid bidirectional characters
bool containsLCat = false;
bool containsRandALCat = false;
- for (int j = 0; j < mapped.size() && (!containsLCat || !containsRandALCat); ++j) {
- if (isBidirectionalL(mapped.at(j)))
+ src = source->data();
+ e = src + source->size();
+ for (in = src + from; in < e && (!containsLCat || !containsRandALCat); ++in) {
+ if (isBidirectionalL(*in))
containsLCat = true;
- else if (isBidirectionalRorAL(mapped.at(j)))
+ else if (isBidirectionalRorAL(*in))
containsRandALCat = true;
}
if (containsRandALCat) {
- if (containsLCat || (!isBidirectionalRorAL(mapped.at(0))
- || !isBidirectionalRorAL(mapped.at(mapped.size() - 1))))
- mapped.clear();
+ if (containsLCat || (!isBidirectionalRorAL(src[from])
+ || !isBidirectionalRorAL(e[-1])))
+ source->resize(from); // not allowed, clear the label
}
+}
+
+bool qt_check_std3rules(const QChar *uc, int len)
+{
+ if (len > 63)
+ return false;
- return mapped;
+ for (int i = 0; i < len; ++i) {
+ register ushort c = uc[i].unicode();
+ if (c == '-' && (i == 0 || i == len - 1))
+ return false;
+
+ // verifying the absence of LDH is the same as verifying that
+ // only LDH is present
+ if (c == '-' || (c >= '0' && c <= '9')
+ || (c >= 'A' && c <= 'Z')
+ || (c >= 'a' && c <= 'z'))
+ continue;
+
+ return false;
+ }
+
+ return true;
}
-static inline char encodeDigit(uint digit)
+static inline uint encodeDigit(uint digit)
{
return digit + 22 + 75 * (digit < 26);
}
@@ -2977,7 +3010,7 @@ static inline uint adapt(uint delta, uint numpoints, bool firsttime)
return k + (((base - tmin + 1) * delta) / (delta + skew));
}
-static inline void appendEncode(QByteArray* output, uint& delta, uint& bias, uint& b, uint& h)
+static inline void appendEncode(QString* output, uint& delta, uint& bias, uint& b, uint& h)
{
uint qq;
uint k;
@@ -2991,17 +3024,17 @@ static inline void appendEncode(QByteArray* output, uint& delta, uint& bias, uin
t = (k <= bias) ? tmin : (k >= bias + tmax) ? tmax : k - bias;
if (qq < t) break;
- *output += encodeDigit(t + (qq - t) % (base - t));
+ *output += QChar(encodeDigit(t + (qq - t) % (base - t)));
qq = (qq - t) / (base - t);
}
- *output += encodeDigit(qq);
+ *output += QChar(encodeDigit(qq));
bias = adapt(delta, h + 1, h == b);
delta = 0;
++h;
}
-static void toPunycodeHelper(const QChar *s, int ucLength, QByteArray *output)
+static void toPunycodeHelper(const QChar *s, int ucLength, QString *output)
{
uint n = initial_n;
uint delta = 0;
@@ -3010,7 +3043,7 @@ static void toPunycodeHelper(const QChar *s, int ucLength, QByteArray *output)
int outLen = output->length();
output->resize(outLen + ucLength);
- char *d = output->data() + outLen;
+ QChar *d = output->data() + outLen;
bool skipped = false;
// copy all basic code points verbatim to output.
for (uint j = 0; j < (uint) ucLength; ++j) {
@@ -3035,7 +3068,7 @@ static void toPunycodeHelper(const QChar *s, int ucLength, QByteArray *output)
// if basic code points were copied, add the delimiter character.
if (h > 0)
- *output += 0x2d;
+ *output += QChar(0x2d);
// while there are still unprocessed non-basic code points left in
// the input string...
@@ -3083,7 +3116,7 @@ static void toPunycodeHelper(const QChar *s, int ucLength, QByteArray *output)
}
// prepend ACE prefix
- output->insert(outLen, "xn--");
+ output->insert(outLen, QLatin1String("xn--"));
return;
}
@@ -3144,11 +3177,15 @@ static bool qt_is_idn_enabled(const QString &domain)
int idx = domain.lastIndexOf(QLatin1Char('.'));
if (idx == -1)
return false;
- const QChar *tld = domain.constData() + idx + 1;
+
int len = domain.size() - idx - 1;
+ QString tldString(domain.constData() + idx + 1, len);
+ qt_nameprep(&tldString, 0);
+
+ const QChar *tld = tldString.constData();
if (user_idn_whitelist)
- return user_idn_whitelist->contains(QString(tld, len));
+ return user_idn_whitelist->contains(tldString);
int l = 0;
int r = sizeof(idn_whitelist)/sizeof(const char *) - 1;
@@ -3164,46 +3201,127 @@ static bool qt_is_idn_enabled(const QString &domain)
return equal(tld, len, idn_whitelist[i]);
}
-static QString qt_from_ACE(const QString &domainMC)
+static inline bool isDotDelimiter(ushort uc)
{
- QString domain = domainMC.toLower();
- int idx = domain.indexOf(QLatin1Char('.'));
- if (idx != -1) {
- if (!domain.contains(QLatin1String("xn--"))) {
- bool simple = true;
- for (int i = 0; i < domain.size(); ++i) {
- ushort ch = domain.at(i).unicode();
- if (ch > 'z' || ch < '-' || ch == '/' || (ch > '9' && ch < 'A') || (ch > 'Z' && ch < 'a')) {
+ // IDNA / rfc3490 describes these four delimiters used for
+ // separating labels in unicode international domain
+ // names.
+ return uc == 0x2e || uc == 0x3002 || uc == 0xff0e || uc == 0xff61;
+}
+
+static int nextDotDelimiter(const QString &domain, int from = 0)
+{
+ const QChar *b = domain.unicode();
+ const QChar *ch = b + from;
+ const QChar *e = b + domain.length();
+ while (ch < e) {
+ if (isDotDelimiter(ch->unicode()))
+ break;
+ else
+ ++ch;
+ }
+ return ch - b;
+}
+
+enum AceOperation { ToAceOnly, NormalizeAce };
+static QString qt_ACE_do(const QString &domain, AceOperation op)
+{
+ if (domain.isEmpty())
+ return domain;
+
+ QString result;
+ result.reserve(domain.length());
+
+ const bool isIdnEnabled = op == NormalizeAce ? qt_is_idn_enabled(domain) : false;
+ int lastIdx = 0;
+ QString aceForm; // this variable is here for caching
+
+ while (1) {
+ int idx = nextDotDelimiter(domain, lastIdx);
+ int labelLength = idx - lastIdx;
+ if (labelLength == 0)
+ return QString(); // two delimiters in a row -- empty label not allowed
+
+ // RFC 3490 says, about the ToASCII operation:
+ // 3. If the UseSTD3ASCIIRules flag is set, then perform these checks:
+ //
+ // (a) Verify the absence of non-LDH ASCII code points; that is, the
+ // absence of 0..2C, 2E..2F, 3A..40, 5B..60, and 7B..7F.
+ //
+ // (b) Verify the absence of leading and trailing hyphen-minus; that
+ // is, the absence of U+002D at the beginning and end of the
+ // sequence.
+ // and:
+ // 8. Verify that the number of code points is in the range 1 to 63
+ // inclusive.
+
+ // copy the label to the destination, which also serves as our scratch area, lowercasing it
+ int prevLen = result.size();
+ bool simple = true;
+ result.resize(prevLen + labelLength);
+ {
+ QChar *out = result.data() + prevLen;
+ const QChar *in = domain.constData() + lastIdx;
+ const QChar *e = in + labelLength;
+ for (; in < e; ++in, ++out) {
+ register ushort uc = in->unicode();
+ if (uc > 0x7f)
simple = false;
- break;
- }
+ if (uc >= 'A' && uc <= 'Z')
+ *out = QChar(uc | 0x20);
+ else
+ *out = *in;
}
- if (simple)
- return domain;
}
-
- const bool isIdnEnabled = qt_is_idn_enabled(domain);
- int lastIdx = 0;
- QString result;
- while (1) {
- // Nameprep the host. If the labels in the hostname are Punycode
- // encoded, we decode them immediately, then nameprep them.
- QByteArray label;
- toPunycodeHelper(domain.constData() + lastIdx, idx - lastIdx, &label);
- result += qt_nameprep(isIdnEnabled ? QUrl::fromPunycode(label) : QString::fromLatin1(label));
- lastIdx = idx + 1;
- if (lastIdx < domain.size() + 1)
- result += QLatin1Char('.');
- else
- break;
- idx = domain.indexOf(QLatin1Char('.'), lastIdx);
- if (idx == -1)
- idx = domain.size();
+
+ if (simple && labelLength > 6) {
+ // ACE form domains contain only ASCII characters, but we can't consider them simple
+ // is this an ACE form?
+ // the shortest valid ACE domain is 6 characters long (U+0080 would be 1, but it's not allowed)
+ static const ushort acePrefixUtf16[] = { 'x', 'n', '-', '-' };
+ if (memcmp(result.constData() + prevLen, acePrefixUtf16, sizeof acePrefixUtf16) == 0)
+ simple = false;
}
- return result;
- } else {
- return qt_nameprep(domain);
+
+ if (simple) {
+ // fastest case: this is the common case (non IDN-domains)
+ // so we're done
+ if (!qt_check_std3rules(result.constData() + prevLen, labelLength))
+ return QString();
+ } else {
+ // Punycode encoding and decoding cannot be done in-place
+ // That means we need one or two temporaries
+ qt_nameprep(&result, prevLen);
+ labelLength = result.length() - prevLen;
+ register int toReserve = labelLength + 4 + 6; // "xn--" plus some extra bytes
+ if (toReserve > aceForm.capacity())
+ aceForm.reserve(toReserve);
+ toPunycodeHelper(result.constData() + prevLen, result.size() - prevLen, &aceForm);
+
+ // We use resize()+memcpy() here because we're overwriting the data we've copied
+ if (isIdnEnabled) {
+ QString tmp = QUrl::fromPunycode(aceForm.toLatin1());
+ if (tmp.isEmpty())
+ return QString(); // shouldn't happen, since we've just punycode-encoded it
+ result.resize(prevLen + tmp.size());
+ memcpy(result.data() + prevLen, tmp.constData(), tmp.size() * sizeof(QChar));
+ } else {
+ result.resize(prevLen + aceForm.size());
+ memcpy(result.data() + prevLen, aceForm.constData(), aceForm.size() * sizeof(QChar));
+ }
+
+ if (!qt_check_std3rules(aceForm.constData(), aceForm.size()))
+ return QString();
+ }
+
+
+ lastIdx = idx + 1;
+ if (lastIdx < domain.size() + 1)
+ result += QLatin1Char('.');
+ else
+ break;
}
+ return result;
}
@@ -3246,12 +3364,27 @@ QUrlPrivate::QUrlPrivate(const QUrlPrivate &copy)
QString QUrlPrivate::canonicalHost() const
{
- if (QURL_HASFLAG(stateFlags, HostCanonicalized))
+ if (QURL_HASFLAG(stateFlags, HostCanonicalized) || host.isEmpty())
return host;
QUrlPrivate *that = const_cast<QUrlPrivate *>(this);
QURL_SETFLAG(that->stateFlags, HostCanonicalized);
- that->host = qt_from_ACE(host);
+ if (host.contains(QLatin1Char(':'))) {
+ // This is an IP Literal, use _IPLiteral to validate
+ QByteArray ba = host.toLatin1();
+ if (!ba.startsWith('[')) {
+ // surround the IP Literal with [ ] if it's not already done so
+ ba.reserve(ba.length() + 2);
+ ba.prepend('[');
+ ba.append(']');
+ }
+
+ const char *ptr = ba.constData();
+ if (!_IPLiteral(&ptr))
+ that->host.clear();
+ } else {
+ that->host = qt_ACE_do(host, NormalizeAce);
+ }
return that->host;
}
@@ -3737,7 +3870,10 @@ QByteArray QUrlPrivate::toEncoded(QUrl::FormattingOptions options) const
}
}
- url += QUrl::toAce(host);
+ if (host.startsWith(QLatin1Char('[')))
+ url += host.toLatin1();
+ else
+ url += QUrl::toAce(host);
if (!(options & QUrl::RemovePort) && port != -1) {
url += ':';
url += QString::number(port).toAscii();
@@ -4412,8 +4548,6 @@ void QUrl::setHost(const QString &host)
QURL_UNSETFLAG(d->stateFlags, QUrlPrivate::Validated | QUrlPrivate::Normalized | QUrlPrivate::HostCanonicalized);
d->host = host;
- if (d->host.contains(QLatin1Char(':')))
- d->host = QLatin1Char('[') + d->host + QLatin1Char(']');
}
/*!
@@ -5425,9 +5559,9 @@ QByteArray QUrl::toPercentEncoding(const QString &input, const QByteArray &exclu
*/
QByteArray QUrl::toPunycode(const QString &uc)
{
- QByteArray output;
+ QString output;
toPunycodeHelper(uc.constData(), uc.size(), &output);
- return output;
+ return output.toLatin1();
}
/*!
@@ -5528,7 +5662,7 @@ QString QUrl::fromPunycode(const QByteArray &pc)
*/
QString QUrl::fromAce(const QByteArray &domain)
{
- return qt_from_ACE(QString::fromLatin1(domain));
+ return qt_ACE_do(QString::fromLatin1(domain), NormalizeAce);
}
/*!
@@ -5545,26 +5679,8 @@ QString QUrl::fromAce(const QByteArray &domain)
*/
QByteArray QUrl::toAce(const QString &domain)
{
- // IDNA / rfc3490 describes these four delimiters used for
- // separating labels in unicode international domain
- // names.
- QString nameprepped = qt_nameprep(domain);
- int lastIdx = 0;
- QByteArray result;
- for (int i = 0; i < nameprepped.size(); ++i) {
- ushort uc = nameprepped.at(i).unicode();
- if (uc == 0x2e || uc == 0x3002 || uc == 0xff0e || uc == 0xff61) {
- if (lastIdx)
- result += '.';
- toPunycodeHelper(nameprepped.constData() + lastIdx, i - lastIdx, &result);
- lastIdx = i + 1;
- }
- }
- if (lastIdx)
- result += '.';
- toPunycodeHelper(nameprepped.constData() + lastIdx, nameprepped.size() - lastIdx, &result);
-
- return result;
+ QString result = qt_ACE_do(domain, ToAceOnly);
+ return result.toLatin1();
}
/*!