From 655e5c958458cc792e8cc399b8b74139c50fa054 Mon Sep 17 00:00:00 2001 From: Joel Hockey Date: Sun, 16 Aug 2020 17:19:35 -0700 Subject: [Backport] CVE-2021-3517: libxml2: Heap-based buffer overflow in xmlEncodeEntitiesInternal() in entities.c Manual cherry-pick of patch originally committed as https://gitlab.gnome.org/GNOME/libxml2/-/commit/bf22713507fe1fc3a2c4b525cf0a88c2dc87a3a2: Validate UTF8 in xmlEncodeEntities Code is currently assuming UTF-8 without validating. Truncated UTF-8 input can cause out-of-bounds array access. Adds further checks to partial fix in 50f06b3e. Fixes #178 Change-Id: Idc0134c16b449e7bd8d5e76ae16c9e92798b5f37 Reviewed-by: Allan Sandfeld Jensen --- chromium/third_party/libxml/src/entities.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/chromium/third_party/libxml/src/entities.c b/chromium/third_party/libxml/src/entities.c index e4a09d62985..dc940ff1b8a 100644 --- a/chromium/third_party/libxml/src/entities.c +++ b/chromium/third_party/libxml/src/entities.c @@ -667,11 +667,25 @@ xmlEncodeEntitiesInternal(xmlDocPtr doc, const xmlChar *input, int attr) { } else { /* * We assume we have UTF-8 input. + * It must match either: + * 110xxxxx 10xxxxxx + * 1110xxxx 10xxxxxx 10xxxxxx + * 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + * That is: + * cur[0] is 11xxxxxx + * cur[1] is 10xxxxxx + * cur[2] is 10xxxxxx if cur[0] is 111xxxxx + * cur[3] is 10xxxxxx if cur[0] is 1111xxxx + * cur[0] is not 11111xxx */ char buf[11], *ptr; int val = 0, l = 1; - if (*cur < 0xC0) { + if (((cur[0] & 0xC0) != 0xC0) || + ((cur[1] & 0xC0) != 0x80) || + (((cur[0] & 0xE0) == 0xE0) && ((cur[2] & 0xC0) != 0x80)) || + (((cur[0] & 0xF0) == 0xF0) && ((cur[3] & 0xC0) != 0x80)) || + (((cur[0] & 0xF8) == 0xF8))) { xmlEntitiesErr(XML_CHECK_NOT_UTF8, "xmlEncodeEntities: input not UTF-8"); if (doc != NULL) -- cgit v1.2.3