diff options
author | Charlie Harrison <csharrison@chromium.org> | 2019-04-05 13:30:53 +0000 |
---|---|---|
committer | Michael BrĂ¼ning <michael.bruning@qt.io> | 2019-12-04 21:04:56 +0000 |
commit | 9ccd70b1edbe5226ef6a7f911c560a85d52a3bea (patch) | |
tree | 546cf88a5217492c2c21e903a71ccd7b0e9ac431 | |
parent | e274a456ab5109712f6ba91e3b9410ab22a439fa (diff) |
[Backport] CVE-2019-5839
Make path URL parsing more lax
Parsing the path component of a non-special URL like javascript or data
should not fail for invalid URL characters like \uFFFF. See this bit
in the spec:
https://url.spec.whatwg.org/#cannot-be-a-base-url-path-state
Note: some failing WPTs are added which are because url parsing
replaces invalid characters (e.g. \uFFFF) with the replacement char
\uFFFD, when that isn't in the spec.
Bug: 925614
Change-Id: Iad9ef7456ddb4d86b1d8d995e2d48fee9483864e
Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
-rw-r--r-- | chromium/url/url_canon_pathurl.cc | 23 | ||||
-rw-r--r-- | chromium/url/url_canon_unittest.cc | 10 |
2 files changed, 19 insertions, 14 deletions
diff --git a/chromium/url/url_canon_pathurl.cc b/chromium/url/url_canon_pathurl.cc index 494fbdafb16..36fbce8ee39 100644 --- a/chromium/url/url_canon_pathurl.cc +++ b/chromium/url/url_canon_pathurl.cc @@ -16,13 +16,12 @@ namespace { // Canonicalize the given |component| from |source| into |output| and // |new_component|. If |separator| is non-zero, it is pre-pended to |output| // prior to the canonicalized component; i.e. for the '?' or '#' characters. -template<typename CHAR, typename UCHAR> -bool DoCanonicalizePathComponent(const CHAR* source, +template <typename CHAR, typename UCHAR> +void DoCanonicalizePathComponent(const CHAR* source, const Component& component, char separator, CanonOutput* output, Component* new_component) { - bool success = true; if (component.is_valid()) { if (separator) output->push_back(separator); @@ -34,7 +33,7 @@ bool DoCanonicalizePathComponent(const CHAR* source, for (int i = component.begin; i < end; i++) { UCHAR uch = static_cast<UCHAR>(source[i]); if (uch < 0x20 || uch >= 0x80) - success &= AppendUTF8EscapedChar(source, &i, end, output); + AppendUTF8EscapedChar(source, &i, end, output); else output->push_back(static_cast<char>(uch)); } @@ -43,7 +42,6 @@ bool DoCanonicalizePathComponent(const CHAR* source, // Empty part. new_component->reset(); } - return success; } template <typename CHAR, typename UCHAR> @@ -63,12 +61,15 @@ bool DoCanonicalizePathURL(const URLComponentSource<CHAR>& source, new_parsed->port.reset(); // We allow path URLs to have the path, query and fragment components, but we // will canonicalize each of the via the weaker path URL rules. - success &= DoCanonicalizePathComponent<CHAR, UCHAR>( - source.path, parsed.path, '\0', output, &new_parsed->path); - success &= DoCanonicalizePathComponent<CHAR, UCHAR>( - source.query, parsed.query, '?', output, &new_parsed->query); - success &= DoCanonicalizePathComponent<CHAR, UCHAR>( - source.ref, parsed.ref, '#', output, &new_parsed->ref); + // + // Note: parsing the path part should never cause a failure, see + // https://url.spec.whatwg.org/#cannot-be-a-base-url-path-state + DoCanonicalizePathComponent<CHAR, UCHAR>(source.path, parsed.path, '\0', + output, &new_parsed->path); + DoCanonicalizePathComponent<CHAR, UCHAR>(source.query, parsed.query, '?', + output, &new_parsed->query); + DoCanonicalizePathComponent<CHAR, UCHAR>(source.ref, parsed.ref, '#', output, + &new_parsed->ref); return success; } diff --git a/chromium/url/url_canon_unittest.cc b/chromium/url/url_canon_unittest.cc index 9ca51aaaf37..6fa47a230c3 100644 --- a/chromium/url/url_canon_unittest.cc +++ b/chromium/url/url_canon_unittest.cc @@ -1808,9 +1808,13 @@ TEST(URLCanonTest, CanonicalizePathURL) { const char* input; const char* expected; } path_cases[] = { - {"javascript:", "javascript:"}, - {"JavaScript:Foo", "javascript:Foo"}, - {"Foo:\":This /is interesting;?#", "foo:\":This /is interesting;?#"}, + {"javascript:", "javascript:"}, + {"JavaScript:Foo", "javascript:Foo"}, + {"Foo:\":This /is interesting;?#", "foo:\":This /is interesting;?#"}, + + // Validation errors should not cause failure. See + // https://crbug.com/925614. + {"javascript:\uFFFF", "javascript:%EF%BF%BD"}, }; for (size_t i = 0; i < arraysize(path_cases); i++) { |