diff options
author | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2017-06-13 13:16:48 +0200 |
---|---|---|
committer | Alexandru Croitor <alexandru.croitor@qt.io> | 2017-07-07 09:15:48 +0000 |
commit | a8e8840888aa122155bc598ba02b62f59f9b37ff (patch) | |
tree | ef7fb8569219a4a766db49f84146a6bcca0d876f | |
parent | 7976efd28ca37c12a4d65f992deff9bc573f3008 (diff) |
[Backport] Fix for CVE-2016-5078
M59 Merge of 'Improve canonicalization of mailto url path components'
The canonicalization of the path component of mailto urls is too lax,
leading to information disclosure and possible command injection attacks
against mail clients. To fix this, we percent-encode more characters in
the path component of mailto urls, matching other Firefox/IE/Edge.
The original land of this patch (via 2817213002) omitted an update to
layout tests.
BUG=711020
TEST=url_unittests,run-webkit-tests fast/url
Change-Id: I23041272ae8e02e4aff17f889636ae88b9a1cb2c
Review-Url: https://codereview.chromium.org/2820373002
Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
-rw-r--r-- | chromium/url/url_canon_mailtourl.cc | 21 | ||||
-rw-r--r-- | chromium/url/url_canon_unittest.cc | 59 |
2 files changed, 64 insertions, 16 deletions
diff --git a/chromium/url/url_canon_mailtourl.cc b/chromium/url/url_canon_mailtourl.cc index fb6bc9ab7e7..8a7ff1ae6b7 100644 --- a/chromium/url/url_canon_mailtourl.cc +++ b/chromium/url/url_canon_mailtourl.cc @@ -13,6 +13,23 @@ namespace url { namespace { +// Certain characters should be percent-encoded when they appear in the path +// component of a mailto URL, to improve compatibility and mitigate against +// command-injection attacks on mailto handlers. See https://crbug.com/711020. +template <typename UCHAR> +bool ShouldEncodeMailboxCharacter(UCHAR uch) { + if (uch < 0x21 || // space & control characters. + uch > 0x7e || // high-ascii characters. + uch == 0x22 || // quote. + uch == 0x3c || uch == 0x3e || // angle brackets. + uch == 0x60 || // backtick. + uch == 0x7b || uch == 0x7c || uch == 0x7d // braces and pipe. + ) { + return true; + } + return false; +} + template <typename CHAR, typename UCHAR> bool DoCanonicalizeMailtoURL(const URLComponentSource<CHAR>& source, const Parsed& parsed, @@ -38,12 +55,12 @@ bool DoCanonicalizeMailtoURL(const URLComponentSource<CHAR>& source, new_parsed->path.begin = output->length(); // Copy the path using path URL's more lax escaping rules. - // We convert to UTF-8 and escape non-ASCII, but leave all + // We convert to UTF-8 and escape non-ASCII, but leave most // ASCII characters alone. int end = parsed.path.end(); for (int i = parsed.path.begin; i < end; ++i) { UCHAR uch = static_cast<UCHAR>(source.path[i]); - if (uch < 0x20 || uch >= 0x80) + if (ShouldEncodeMailboxCharacter<UCHAR>(uch)) success &= AppendUTF8EscapedChar(source.path, &i, end, output); else output->push_back(static_cast<char>(uch)); diff --git a/chromium/url/url_canon_unittest.cc b/chromium/url/url_canon_unittest.cc index 82edd0e6221..30159579826 100644 --- a/chromium/url/url_canon_unittest.cc +++ b/chromium/url/url_canon_unittest.cc @@ -1788,20 +1788,51 @@ TEST(URLCanonTest, CanonicalizeMailtoURL) { Component expected_path; Component expected_query; } cases[] = { - {"mailto:addr1", "mailto:addr1", true, Component(7, 5), Component()}, - {"mailto:addr1@foo.com", "mailto:addr1@foo.com", true, Component(7, 13), Component()}, + // Null character should be escaped to %00. + // Keep this test first in the list as it is handled specially below. + {"mailto:addr1\0addr2?foo", + "mailto:addr1%00addr2?foo", + true, Component(7, 13), Component(21, 3)}, + {"mailto:addr1", + "mailto:addr1", + true, Component(7, 5), Component()}, + {"mailto:addr1@foo.com", + "mailto:addr1@foo.com", + true, Component(7, 13), Component()}, // Trailing whitespace is stripped. - {"MaIlTo:addr1 \t ", "mailto:addr1", true, Component(7, 5), Component()}, - {"MaIlTo:addr1?to=jon", "mailto:addr1?to=jon", true, Component(7, 5), Component(13,6)}, - {"mailto:addr1,addr2", "mailto:addr1,addr2", true, Component(7, 11), Component()}, - {"mailto:addr1, addr2", "mailto:addr1, addr2", true, Component(7, 12), Component()}, - {"mailto:addr1%2caddr2", "mailto:addr1%2caddr2", true, Component(7, 13), Component()}, - {"mailto:\xF0\x90\x8C\x80", "mailto:%F0%90%8C%80", true, Component(7, 12), Component()}, - // Null character should be escaped to %00 - {"mailto:addr1\0addr2?foo", "mailto:addr1%00addr2?foo", true, Component(7, 13), Component(21, 3)}, + {"MaIlTo:addr1 \t ", + "mailto:addr1", + true, Component(7, 5), Component()}, + {"MaIlTo:addr1?to=jon", + "mailto:addr1?to=jon", + true, Component(7, 5), Component(13,6)}, + {"mailto:addr1,addr2", + "mailto:addr1,addr2", + true, Component(7, 11), Component()}, + // Embedded spaces must be encoded. + {"mailto:addr1, addr2", + "mailto:addr1,%20addr2", + true, Component(7, 14), Component()}, + {"mailto:addr1, addr2?subject=one two ", + "mailto:addr1,%20addr2?subject=one%20two", + true, Component(7, 14), Component(22, 17)}, + {"mailto:addr1%2caddr2", + "mailto:addr1%2caddr2", + true, Component(7, 13), Component()}, + {"mailto:\xF0\x90\x8C\x80", + "mailto:%F0%90%8C%80", + true, Component(7, 12), Component()}, // Invalid -- UTF-8 encoded surrogate value. - {"mailto:\xed\xa0\x80", "mailto:%EF%BF%BD", false, Component(7, 9), Component()}, - {"mailto:addr1?", "mailto:addr1?", true, Component(7, 5), Component(13, 0)}, + {"mailto:\xed\xa0\x80", + "mailto:%EF%BF%BD", + false, Component(7, 9), Component()}, + {"mailto:addr1?", + "mailto:addr1?", + true, Component(7, 5), Component(13, 0)}, + // Certain characters have special meanings and must be encoded. + {"mailto:! \x22$&()+,-./09:;<=>@AZ[\\]&_`az{|}~\x7f?Query! \x22$&()+,-./09:;<=>@AZ[\\]&_`az{|}~", + "mailto:!%20%22$&()+,-./09:;%3C=%3E@AZ[\\]&_%60az%7B%7C%7D~%7F?Query!%20%22$&()+,-./09:;%3C=%3E@AZ[\\]&_`az{|}~", + true, Component(7, 53), Component(61, 47)}, }; // Define outside of loop to catch bugs where components aren't reset @@ -1810,8 +1841,8 @@ TEST(URLCanonTest, CanonicalizeMailtoURL) { for (size_t i = 0; i < arraysize(cases); i++) { int url_len = static_cast<int>(strlen(cases[i].input)); - if (i == 8) { - // The 9th test case purposely has a '\0' in it -- don't count it + if (i == 0) { + // The first test case purposely has a '\0' in it -- don't count it // as the string terminator. url_len = 22; } |