[Backport] Fix for CVE-2016-5078

M59 Merge of 'Improve canonicalization of mailto url path components' The canonicalization of the path component of mailto urls is too lax, leading to information disclosure and possible command injection attacks against mail clients. To fix this, we percent-encode more characters in the path component of mailto urls, matching other Firefox/IE/Edge. The original land of this patch (via 2817213002) omitted an update to layout tests. BUG=711020 TEST=url_unittests,run-webkit-tests fast/url Change-Id: I23041272ae8e02e4aff17f889636ae88b9a1cb2c Review-Url: https://codereview.chromium.org/2820373002 Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
author: Allan Sandfeld Jensen <allan.jensen@qt.io> 2017-06-13 13:16:48 +0200
committer: Alexandru Croitor <alexandru.croitor@qt.io> 2017-07-07 09:15:48 +0000
commit: a8e8840888aa122155bc598ba02b62f59f9b37ff (patch)
tree: ef7fb8569219a4a766db49f84146a6bcca0d876f
parent: 7976efd28ca37c12a4d65f992deff9bc573f3008 (diff)
2 files changed, 64 insertions, 16 deletions
diff --git a/chromium/url/url_canon_mailtourl.cc b/chromium/url/url_canon_mailtourl.cc
index fb6bc9ab7e7..8a7ff1ae6b7 100644
--- a/chromium/url/url_canon_mailtourl.cc
+++ b/chromium/url/url_canon_mailtourl.cc
@@ -13,6 +13,23 @@ namespace url {
 
 namespace {
 
+// Certain characters should be percent-encoded when they appear in the path
+// component of a mailto URL, to improve compatibility and mitigate against
+// command-injection attacks on mailto handlers. See https://crbug.com/711020.
+template <typename UCHAR>
+bool ShouldEncodeMailboxCharacter(UCHAR uch) {
+  if (uch < 0x21 ||                              // space & control characters.
+      uch > 0x7e ||                              // high-ascii characters.
+      uch == 0x22 ||                             // quote.
+      uch == 0x3c || uch == 0x3e ||              // angle brackets.
+      uch == 0x60 ||                             // backtick.
+      uch == 0x7b || uch == 0x7c || uch == 0x7d  // braces and pipe.
+      ) {
+    return true;
+  }
+  return false;
+}
+
 template <typename CHAR, typename UCHAR>
 bool DoCanonicalizeMailtoURL(const URLComponentSource<CHAR>& source,
                              const Parsed& parsed,
@@ -38,12 +55,12 @@ bool DoCanonicalizeMailtoURL(const URLComponentSource<CHAR>& source,
     new_parsed->path.begin = output->length();
 
     // Copy the path using path URL's more lax escaping rules.
-    // We convert to UTF-8 and escape non-ASCII, but leave all
+    // We convert to UTF-8 and escape non-ASCII, but leave most
     // ASCII characters alone.
     int end = parsed.path.end();
     for (int i = parsed.path.begin; i < end; ++i) {
       UCHAR uch = static_cast<UCHAR>(source.path[i]);
-      if (uch < 0x20 || uch >= 0x80)
+      if (ShouldEncodeMailboxCharacter<UCHAR>(uch))
         success &= AppendUTF8EscapedChar(source.path, &i, end, output);
       else
         output->push_back(static_cast<char>(uch));
diff --git a/chromium/url/url_canon_unittest.cc b/chromium/url/url_canon_unittest.cc
index 82edd0e6221..30159579826 100644
--- a/chromium/url/url_canon_unittest.cc
+++ b/chromium/url/url_canon_unittest.cc
@@ -1788,20 +1788,51 @@ TEST(URLCanonTest, CanonicalizeMailtoURL) {
     Component expected_path;
     Component expected_query;
   } cases[] = {
-    {"mailto:addr1", "mailto:addr1", true, Component(7, 5), Component()},
-    {"mailto:addr1@foo.com", "mailto:addr1@foo.com", true, Component(7, 13), Component()},
+    // Null character should be escaped to %00.
+    // Keep this test first in the list as it is handled specially below.
+    {"mailto:addr1\0addr2?foo",
+     "mailto:addr1%00addr2?foo",
+     true, Component(7, 13), Component(21, 3)},
+    {"mailto:addr1",
+     "mailto:addr1",
+     true, Component(7, 5), Component()},
+    {"mailto:addr1@foo.com",
+     "mailto:addr1@foo.com",
+     true, Component(7, 13), Component()},
     // Trailing whitespace is stripped.
-    {"MaIlTo:addr1 \t ", "mailto:addr1", true, Component(7, 5), Component()},
-    {"MaIlTo:addr1?to=jon", "mailto:addr1?to=jon", true, Component(7, 5), Component(13,6)},
-    {"mailto:addr1,addr2", "mailto:addr1,addr2", true, Component(7, 11), Component()},
-    {"mailto:addr1, addr2", "mailto:addr1, addr2", true, Component(7, 12), Component()},
-    {"mailto:addr1%2caddr2", "mailto:addr1%2caddr2", true, Component(7, 13), Component()},
-    {"mailto:\xF0\x90\x8C\x80", "mailto:%F0%90%8C%80", true, Component(7, 12), Component()},
-    // Null character should be escaped to %00
-    {"mailto:addr1\0addr2?foo", "mailto:addr1%00addr2?foo", true, Component(7, 13), Component(21, 3)},
+    {"MaIlTo:addr1 \t ",
+     "mailto:addr1",
+     true, Component(7, 5), Component()},
+    {"MaIlTo:addr1?to=jon",
+     "mailto:addr1?to=jon",
+     true, Component(7, 5), Component(13,6)},
+    {"mailto:addr1,addr2",
+     "mailto:addr1,addr2",
+     true, Component(7, 11), Component()},
+    // Embedded spaces must be encoded.
+    {"mailto:addr1, addr2",
+     "mailto:addr1,%20addr2",
+     true, Component(7, 14), Component()},
+    {"mailto:addr1, addr2?subject=one two ",
+     "mailto:addr1,%20addr2?subject=one%20two",
+     true, Component(7, 14), Component(22, 17)},
+    {"mailto:addr1%2caddr2",
+     "mailto:addr1%2caddr2",
+     true, Component(7, 13), Component()},
+    {"mailto:\xF0\x90\x8C\x80",
+     "mailto:%F0%90%8C%80",
+     true, Component(7, 12), Component()},
     // Invalid -- UTF-8 encoded surrogate value.
-    {"mailto:\xed\xa0\x80", "mailto:%EF%BF%BD", false, Component(7, 9), Component()},
-    {"mailto:addr1?", "mailto:addr1?", true, Component(7, 5), Component(13, 0)},
+    {"mailto:\xed\xa0\x80",
+     "mailto:%EF%BF%BD",
+     false, Component(7, 9), Component()},
+    {"mailto:addr1?",
+     "mailto:addr1?",
+     true, Component(7, 5), Component(13, 0)},
+    // Certain characters have special meanings and must be encoded.
+    {"mailto:! \x22$&()+,-./09:;<=>@AZ[\\]&_`az{|}~\x7f?Query! \x22$&()+,-./09:;<=>@AZ[\\]&_`az{|}~",
+     "mailto:!%20%22$&()+,-./09:;%3C=%3E@AZ[\\]&_%60az%7B%7C%7D~%7F?Query!%20%22$&()+,-./09:;%3C=%3E@AZ[\\]&_`az{|}~",
+     true, Component(7, 53), Component(61, 47)},
   };
 
   // Define outside of loop to catch bugs where components aren't reset
@@ -1810,8 +1841,8 @@ TEST(URLCanonTest, CanonicalizeMailtoURL) {
 
   for (size_t i = 0; i < arraysize(cases); i++) {
     int url_len = static_cast<int>(strlen(cases[i].input));
-    if (i == 8) {
-      // The 9th test case purposely has a '\0' in it -- don't count it
+    if (i == 0) {
+      // The first test case purposely has a '\0' in it -- don't count it
       // as the string terminator.
       url_len = 22;
     }
author	Allan Sandfeld Jensen <allan.jensen@qt.io>	2017-06-13 13:16:48 +0200
committer	Alexandru Croitor <alexandru.croitor@qt.io>	2017-07-07 09:15:48 +0000
commit	a8e8840888aa122155bc598ba02b62f59f9b37ff (patch)
tree	ef7fb8569219a4a766db49f84146a6bcca0d876f
parent	7976efd28ca37c12a4d65f992deff9bc573f3008 (diff)