summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorThiago Macieira <thiago.macieira@intel.com>2012-05-21 20:21:16 +0200
committerQt by Nokia <qt-info@nokia.com>2012-05-22 20:56:38 +0200
commit1ca791faf5b89c35b4b39d443d2118a882c3946b (patch)
treee11be9b68338786b61d12323f58e998211f89121 /src
parent53d0624403f7f2ac8fe8364a7c5cd136717d40ed (diff)
Add the QUrl::FullyDecoded flag to the component formatting
This allows the QUrl component getters to return fully decoded data, like they did in Qt 4. This is necessary for some use-cases where the component like the user name, password or path are used outside the context of a URL. In those contexts, the percent-encoded data makes no sense, and the loss of data of what could be represented in a URL is acceptable. Also take the opportunity to expand the documentation of those getter methods, explaining what the options argument does. Discussed-on: http://lists.qt-project.org/pipermail/development/2012-May/003811.html Change-Id: I89f743cde78c02f169c88314bff0768714341419 Reviewed-by: Lars Knoll <lars.knoll@nokia.com> Reviewed-by: David Faure <faure@kde.org> Reviewed-by: Shane Kearns <shane.kearns@accenture.com>
Diffstat (limited to 'src')
-rw-r--r--src/corelib/io/qurl.cpp186
-rw-r--r--src/corelib/io/qurl.h3
-rw-r--r--src/corelib/io/qurlrecode.cpp47
3 files changed, 193 insertions, 43 deletions
diff --git a/src/corelib/io/qurl.cpp b/src/corelib/io/qurl.cpp
index a7ceb7337f..6e17740869 100644
--- a/src/corelib/io/qurl.cpp
+++ b/src/corelib/io/qurl.cpp
@@ -242,11 +242,17 @@
URL in QString form, exactly equal to the result of
toEncoded()
- \value MostDecoded Attempt to decode as much as possible. For individual
+ \value FullyDecoded Attempt to decode as much as possible. For individual
components of the URL, this decodes every percent
- encoding sequence, control characters (U+0000 to U+001F)
- and non-US-ASCII sequences that aren't valid UTF-8
- sequences.
+ encoding sequence, including control characters (U+0000
+ to U+001F) and UTF-8 sequences found in percent-encoded form.
+ Note: if the component contains non-US-ASCII sequences
+ that aren't valid UTF-8 sequences, the behaviour is
+ undefined since QString cannot represent those values
+ (data will be lost!)
+ This mode is should not be used in functions where more
+ than one URL component is returned (userInfo() and authority())
+ and it is not allowed in url() and toString().
The values of EncodeReserved and DecodeReserved should not be used together
in one call. The behaviour is undefined if that happens. They are provided
@@ -254,6 +260,15 @@
to reserved characters is different on certain components and specially on
the full URL.
+ The FullyDecoded mode is similar to the behaviour of the functions
+ returning QString in Qt 4.x, including the fact that they will most likely
+ cause data loss if the component in question contains a non-UTF-8
+ percent-encoded sequence. Fortunately, those cases aren't common, so this
+ mode should be used when the component in question is used in a non-URL
+ context. For example, in an FTP client application, the path to the remote
+ file could be stored in a QUrl object, and the string to be transmitted to
+ the FTP server should be obtained using this flag.
+
\sa QUrl::FormattingOptions
*/
@@ -280,17 +295,6 @@ inline static bool isHex(char c)
return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f');
}
-static inline char toHex(quint8 c)
-{
- return c > 9 ? c - 10 + 'A' : c + '0';
-}
-
-static inline quint8 fromHex(quint8 c)
-{
- c |= 0x20;
- return c >= 'a' ? c - 'a' + 10 : c - '0';
-}
-
static inline QString ftpScheme()
{
return QStringLiteral("ftp");
@@ -1672,7 +1676,15 @@ void QUrl::setAuthority(const QString &authority, ParsingMode mode)
Returns the authority of the URL if it is defined; otherwise
an empty string is returned.
- \sa setAuthority()
+ The \a options argument controls how to format the authority portion of the
+ URL. The value of QUrl::FullyDecoded should be avoided, since it may
+ produce an ambiguous return value (for example, if the username contains a
+ colon ':' or either the username or password contain an at-sign '@'). In
+ all other cases, this function returns an unambiguous value, which may
+ contain those characters still percent-encoded, plus some control
+ sequences not representable in decoded form in QString.
+
+ \sa setAuthority(), userInfo(), userName(), password(), host(), port()
*/
QString QUrl::authority(ComponentFormattingOptions options) const
{
@@ -1725,6 +1737,15 @@ void QUrl::setUserInfo(const QString &userInfo, ParsingMode mode)
/*!
Returns the user info of the URL, or an empty string if the user
info is undefined.
+
+ The \a options argument controls how to format the user info component. The
+ value of QUrl::FullyDecoded should be avoided, since it may produce an
+ ambiguous return value (for example, if the username contains a colon ':').
+ In all other cases, this function returns an unambiguous value, which may
+ contain that characters still percent-encoded, plus some control sequences
+ not representable in decoded form in QString.
+
+ \sa setUserInfo(), userName(), password(), authority()
*/
QString QUrl::userInfo(ComponentFormattingOptions options) const
{
@@ -1775,7 +1796,18 @@ void QUrl::setUserName(const QString &userName, ParsingMode mode)
Returns the user name of the URL if it is defined; otherwise
an empty string is returned.
- \sa setUserName(), encodedUserName()
+ The \a options argument controls how to format the user name component. All
+ values produce an unambiguous result. With QUrl::FullyDecoded, all
+ percent-encoded sequences are decoded; otherwise, the returned value may
+ contain some percent-encoded sequences for some control sequences not
+ representable in decoded form in QString.
+
+ Note that QUrl::FullyDecoded may cause data loss if those non-representable
+ sequences are present. It is recommended to use that value when the result
+ will be used in a non-URL context, such as setting in QAuthenticator or
+ negotiating a login.
+
+ \sa setUserName(), userInfo()
*/
QString QUrl::userName(ComponentFormattingOptions options) const
{
@@ -1825,6 +1857,17 @@ void QUrl::setPassword(const QString &password, ParsingMode mode)
Returns the password of the URL if it is defined; otherwise
an empty string is returned.
+ The \a options argument controls how to format the user name component. All
+ values produce an unambiguous result. With QUrl::FullyDecoded, all
+ percent-encoded sequences are decoded; otherwise, the returned value may
+ contain some percent-encoded sequences for some control sequences not
+ representable in decoded form in QString.
+
+ Note that QUrl::FullyDecoded may cause data loss if those non-representable
+ sequences are present. It is recommended to use that value when the result
+ will be used in a non-URL context, such as setting in QAuthenticator or
+ negotiating a login.
+
\sa setPassword()
*/
QString QUrl::password(ComponentFormattingOptions options) const
@@ -1887,6 +1930,20 @@ void QUrl::setHost(const QString &host, ParsingMode mode)
/*!
Returns the host of the URL if it is defined; otherwise
an empty string is returned.
+
+ The \a options argument controls how the hostname will be formatted. The
+ QUrl::EncodeUnicode option will cause this function to return the hostname
+ in the ASCII-Compatible Encoding (ACE) form, which is suitable for use in
+ channels that are not 8-bit clean or that require the legacy hostname (such
+ as DNS requests or in HTTP request headers). If that flag is not present,
+ this function returns the International Domain Name (IDN) in Unicode form,
+ according to the list of permissible top-level domains (see
+ idnWhiteList()).
+
+ All other flags are ignored. Host names cannot contain control or percent
+ characters, so the returned value can be considered fully decoded.
+
+ \sa setHost(), idnWhiteList(), setIdnWhiteList(), authority()
*/
QString QUrl::host(ComponentFormattingOptions options) const
{
@@ -1983,6 +2040,16 @@ void QUrl::setPath(const QString &path, ParsingMode mode)
/*!
Returns the path of the URL.
+ The \a options argument controls how to format the path component. All
+ values produce an unambiguous result. With QUrl::FullyDecoded, all
+ percent-encoded sequences are decoded; otherwise, the returned value may
+ contain some percent-encoded sequences for some control sequences not
+ representable in decoded form in QString.
+
+ Note that QUrl::FullyDecoded may cause data loss if those non-representable
+ sequences are present. It is recommended to use that value when the result
+ will be used in a non-URL context, such as sending to an FTP server.
+
\sa setPath()
*/
QString QUrl::path(ComponentFormattingOptions options) const
@@ -1999,7 +2066,7 @@ QString QUrl::path(ComponentFormattingOptions options) const
Returns true if this URL contains a Query (i.e., if ? was seen on it).
- \sa hasQueryItem(), encodedQuery()
+ \sa setQuery(), query(), hasFragment()
*/
bool QUrl::hasQuery() const
{
@@ -2028,7 +2095,15 @@ bool QUrl::hasQuery() const
In DecodedMode, '%' stand for themselves and encoded characters are not
possible.
- \sa encodedQuery(), hasQuery()
+ Query strings often contain percent-encoded sequences, so use of
+ DecodedMode is discouraged. One special sequence to be aware of is that of
+ the plus character ('+'). QUrl does not convert spaces to plus characters,
+ even though HTML forms posted by web browsers do. In order to represent an
+ actual plus character in a query, the sequence "%2B" is usually used. This
+ function will leave "%2B" sequences untouched in TolerantMode or
+ StrictMode.
+
+ \sa query(), hasQuery()
*/
void QUrl::setQuery(const QString &query, ParsingMode mode)
{
@@ -2045,6 +2120,17 @@ void QUrl::setQuery(const QString &query, ParsingMode mode)
d->sectionIsPresent &= ~QUrlPrivate::Query;
}
+/*!
+ \overload
+ \since 5.0
+ Sets the query string of the URL to \a query.
+
+ This function reconstructs the query string from the QUrlQuery object and
+ sets on this QUrl object. This function does not have parsing parameters
+ because the QUrlQuery contains data that is already parsed.
+
+ \sa query(), hasQuery()
+*/
void QUrl::setQuery(const QUrlQuery &query)
{
detach();
@@ -2058,7 +2144,21 @@ void QUrl::setQuery(const QUrlQuery &query)
}
/*!
- Returns the query string of the URL in percent encoded form.
+ Returns the query string of the URL if there's a query string, or an empty
+ result if not. To determine if the parsed URL contained a query string, use
+ hasQuery().
+
+ The \a options argument controls how to format the query component. All
+ values produce an unambiguous result. With QUrl::FullyDecoded, all
+ percent-encoded sequences are decoded; otherwise, the returned value may
+ contain some percent-encoded sequences for some control sequences not
+ representable in decoded form in QString.
+
+ Note that use of QUrl::FullyDecoded in queries is discouraged, as queries
+ often contain data that is supposed to remain percent-encoded, including
+ the use of the "%2B" sequence to represent a plus character ('+').
+
+ \sa setQuery(), hasQuery()
*/
QString QUrl::query(ComponentFormattingOptions options) const
{
@@ -2116,9 +2216,20 @@ void QUrl::setFragment(const QString &fragment, ParsingMode mode)
}
/*!
- Returns the fragment of the URL.
+ Returns the fragment of the URL. To determine if the parsed URL contained a
+ fragment, use hasFragment().
- \sa setFragment()
+ The \a options argument controls how to format the fragment component. All
+ values produce an unambiguous result. With QUrl::FullyDecoded, all
+ percent-encoded sequences are decoded; otherwise, the returned value may
+ contain some percent-encoded sequences for some control sequences not
+ representable in decoded form in QString.
+
+ Note that QUrl::FullyDecoded may cause data loss if those non-representable
+ sequences are present. It is recommended to use that value when the result
+ will be used in a non-URL context.
+
+ \sa setFragment(), hasFragment()
*/
QString QUrl::fragment(ComponentFormattingOptions options) const
{
@@ -2258,8 +2369,9 @@ bool QUrl::isRelative() const
}
/*!
- Returns a string representation of the URL.
- The output can be customized by passing flags with \a options.
+ Returns a string representation of the URL. The output can be customized by
+ passing flags with \a options. The option QUrl::FullyDecoded is not
+ permitted in this function since it would generate ambiguous data.
The resulting QString can be passed back to a QUrl later on.
@@ -2273,8 +2385,9 @@ QString QUrl::url(FormattingOptions options) const
}
/*!
- Returns a string representation of the URL.
- The output can be customized by passing flags with \a options.
+ Returns a string representation of the URL. The output can be customized by
+ passing flags with \a options. The option QUrl::FullyDecoded is not
+ permitted in this function since it would generate ambiguous data.
The default formatting option is \l{QUrl::FormattingOptions}{PrettyDecoded}.
@@ -2283,6 +2396,10 @@ QString QUrl::url(FormattingOptions options) const
QString QUrl::toString(FormattingOptions options) const
{
if (!d) return QString();
+ if (options == QUrl::FullyDecoded) {
+ qWarning("QUrl: QUrl::FullyDecoded is not permitted when reconstructing the full URL");
+ options = QUrl::PrettyDecoded;
+ }
// return just the path if:
// - QUrl::PreferLocalFile is passed
@@ -2690,7 +2807,7 @@ QString QUrl::toLocalFile() const
return QString();
QString tmp;
- QString ourPath = path(QUrl::MostDecoded);
+ QString ourPath = path(QUrl::FullyDecoded);
// magic for shared drive on windows
if (!d->host.isEmpty()) {
@@ -2704,21 +2821,6 @@ QString QUrl::toLocalFile() const
tmp.remove(0, 1);
#endif
}
-
- // check if we need to do one more decoding pass
- int pct = tmp.indexOf(QLatin1Char('%'));
- while (pct != -1) {
- Q_ASSERT(tmp.size() >= pct + 2);
- ushort char1 = tmp.at(pct + 1).unicode();
- ushort char2 = tmp.at(pct + 2).unicode();
-
- Q_ASSERT(isHex(char1) && char1 < 0x80u);
- Q_ASSERT(isHex(char2) && char2 < 0x80u);
- tmp.replace(pct, 3, QChar(fromHex(char1) << 4 | fromHex(char2)));
-
- // next iteration
- pct = tmp.indexOf(QLatin1Char('%'), pct + 1);
- }
return tmp;
}
diff --git a/src/corelib/io/qurl.h b/src/corelib/io/qurl.h
index 41e6c17fd5..90b29ed958 100644
--- a/src/corelib/io/qurl.h
+++ b/src/corelib/io/qurl.h
@@ -145,9 +145,10 @@ public:
EncodeDelimiters = 0x400000 | 0x800000,
EncodeReserved = 0x1000000,
DecodeReserved = 0x2000000,
+ // 0x4000000 used to indicate full-decode mode
FullyEncoded = EncodeSpaces | EncodeUnicode | EncodeDelimiters | EncodeReserved,
- MostDecoded = PrettyDecoded | DecodeReserved
+ FullyDecoded = FullyEncoded | DecodeReserved | 0x4000000
};
Q_DECLARE_FLAGS(ComponentFormattingOptions, ComponentFormattingOption)
#ifdef qdoc
diff --git a/src/corelib/io/qurlrecode.cpp b/src/corelib/io/qurlrecode.cpp
index 3d985dbdc0..12d23e9450 100644
--- a/src/corelib/io/qurlrecode.cpp
+++ b/src/corelib/io/qurlrecode.cpp
@@ -560,6 +560,43 @@ non_trivial:
return 0;
}
+static int decode(QString &appendTo, const ushort *begin, const ushort *end)
+{
+ const int origSize = appendTo.size();
+ const ushort *input = begin;
+ ushort *output = 0;
+ while (input != end) {
+ if (*input != '%') {
+ if (output)
+ *output++ = *input;
+ ++input;
+ continue;
+ }
+
+ if (Q_UNLIKELY(!output)) {
+ // detach
+ appendTo.resize(origSize + (end - begin));
+ output = reinterpret_cast<ushort *>(appendTo.begin()) + origSize;
+ memcpy(output, begin, (input - begin) * sizeof(ushort));
+ output += input - begin;
+ }
+
+ ++input;
+ Q_ASSERT(input <= end - 2); // we need two characters
+ Q_ASSERT(isHex(input[0]));
+ Q_ASSERT(isHex(input[1]));
+ *output++ = decodeNibble(input[0]) << 4 | decodeNibble(input[1]);
+ input += 2;
+ }
+
+ if (output) {
+ int len = output - reinterpret_cast<ushort *>(appendTo.begin());
+ appendTo.truncate(len);
+ return len - origSize;
+ }
+ return 0;
+}
+
template <size_t N>
static void maskTable(uchar (&table)[N], const uchar (&mask)[N])
{
@@ -583,6 +620,12 @@ static void maskTable(uchar (&table)[N], const uchar (&mask)[N])
\li QUrl::EncodeSpaces: if set, spaces will be encoded to "%20"; if unset, they will be " "
\li QUrl::EncodeUnicode: if set, characters above U+0080 will be encoded to their UTF-8
percent-encoded form; if unset, they will be decoded to UTF-16
+ \li QUrl::FullyDecoded: if set, this function will decode all percent-encoded sequences,
+ including that of the percent character. The resulting string
+ will not be percent-encoded anymore. Use with caution!
+ In this mode, the behaviour is undefined if the input string
+ contains any percent-encoding sequences above %80.
+ Also, the function will not correct bad % sequences.
\endlist
Other flags are ignored (including QUrl::EncodeReserved).
@@ -599,6 +642,10 @@ qt_urlRecode(QString &appendTo, const QChar *begin, const QChar *end,
QUrl::ComponentFormattingOptions encoding, const ushort *tableModifications)
{
uchar actionTable[sizeof defaultActionTable];
+ if (encoding == QUrl::FullyDecoded) {
+ return decode(appendTo, reinterpret_cast<const ushort *>(begin), reinterpret_cast<const ushort *>(end));
+ }
+
if (!(encoding & QUrl::EncodeDelimiters) && encoding & QUrl::DecodeReserved) {
// reset the table
memset(actionTable, DecodeCharacter, sizeof actionTable);