From a2f63dfd7ac0e122c62cba5106496e147b912941 Mon Sep 17 00:00:00 2001 From: Thiago Macieira Date: Wed, 26 Sep 2012 12:23:21 +0200 Subject: Use the new QUrlPrivate::validateComponent validator in the main parser The code was copied from the main parser there, so remove the duplication. Change-Id: I85748f6f76b3097ff22958d9de67cfa27061a72b Reviewed-by: David Faure (KDE) --- src/corelib/io/qurl.cpp | 65 +++++++++---------------------------------------- 1 file changed, 11 insertions(+), 54 deletions(-) diff --git a/src/corelib/io/qurl.cpp b/src/corelib/io/qurl.cpp index a4e1c1e3e3..494caf60d5 100644 --- a/src/corelib/io/qurl.cpp +++ b/src/corelib/io/qurl.cpp @@ -1315,61 +1315,18 @@ inline void QUrlPrivate::parse(const QString &url, QUrl::ParsingMode parsingMode if (error || parsingMode == QUrl::TolerantMode) return; - // The parsing so far was tolerant of errors, so the StrictMode - // parsing is actually implemented here, as an extra post-check. - // We only execute it if we haven't found any errors so far. + // The parsing so far was partially tolerant of errors, except for the + // scheme parser (which is always strict) and the authority (which was + // executed in strict mode). + // If we haven't found any errors so far, continue the strict-mode parsing + // from the path component onwards. - // What we need to look out for, that the regular parser tolerates: - // - percent signs not followed by two hex digits - // - forbidden characters, which should always appear encoded - // '"' / '<' / '>' / '\' / '^' / '`' / '{' / '|' / '}' / BKSP - // control characters - // - delimiters not allowed in certain positions - // . scheme: parser is already strict - // . user info: gen-delims (except for ':') disallowed - // . host: parser is stricter than the standard - // . port: parser is stricter than the standard - // . path: all delimiters allowed - // . fragment: all delimiters allowed - // . query: all delimiters allowed - // We would only need to check the user-info. However, the presence - // of the disallowed gen-delims changes the parsing, so we don't - // actually need to do anything - static const char forbidden[] = "\"<>\\^`{|}\x7F"; - for (uint i = 0; i < uint(len); ++i) { - register uint uc = data[i]; - if (uc >= 0x80) - continue; - - if ((uc == '%' && (uint(len) < i + 2 || !isHex(data[i + 1]) || !isHex(data[i + 2]))) - || uc <= 0x20 || strchr(forbidden, uc)) { - // found an error - ErrorCode errorCode; - - // where are we? - if (i > uint(hash)) { - errorCode = InvalidFragmentError; - } else if (i > uint(question)) { - errorCode = InvalidQueryError; - } else if (i > uint(pathStart)) { - // pathStart is never -1 - errorCode = InvalidPathError; - } else { - // It must be in the authority, since the scheme is strict. - // Since the port and hostname parsers are also strict, - // the error can only have happened in the user info. - int pos = url.indexOf(QLatin1Char(':'), hierStart); - if (i > uint(pos)) { - errorCode = InvalidPasswordError; - } else { - errorCode = InvalidUserNameError; - } - } - - setError(errorCode, url, i); - return; - } - } + if (!validateComponent(Path, url, pathStart, hierEnd)) + return; + if (uint(question) < uint(hash) && !validateComponent(Query, url, question + 1, qMin(hash, len))) + return; + if (hash != -1) + validateComponent(Fragment, url, hash + 1, len); } /* -- cgit v1.2.3