From a36adfc73ee0085313712dfe1c8c37454dd9380e Mon Sep 17 00:00:00 2001 From: Giuseppe D'Angelo Date: Sun, 12 Apr 2015 10:56:13 +0200 Subject: Upgrade PCRE to r1546 Thanks to LLVM's libFuzzer a dozen of assorted buffer overflows has been discovered, see [1, 2] [1] http://vcs.pcre.org/viewvc/code/trunk/ChangeLog?view=markup [2] http://blog.llvm.org/2015/04/fuzz-all-clangs.html Change-Id: Ib9fd8dfaee8dc50e1899ebac83a74ac1107a0bd2 Reviewed-by: Konstantin Ritt --- src/3rdparty/pcre/pcre_compile.c | 142 +++++++++++++++++++++++------------ src/3rdparty/pcre/pcre_dfa_exec.c | 5 +- src/3rdparty/pcre/pcre_exec.c | 20 +++-- src/3rdparty/pcre/pcre_jit_compile.c | 8 +- src/3rdparty/pcre/sljit/sljitLir.c | 6 ++ src/3rdparty/pcre/sljit/sljitLir.h | 12 +-- 6 files changed, 126 insertions(+), 67 deletions(-) (limited to 'src/3rdparty/pcre') diff --git a/src/3rdparty/pcre/pcre_compile.c b/src/3rdparty/pcre/pcre_compile.c index 7df7fef4bd..3f8174fbd4 100644 --- a/src/3rdparty/pcre/pcre_compile.c +++ b/src/3rdparty/pcre/pcre_compile.c @@ -866,6 +866,14 @@ static const pcre_uint8 opcode_possessify[] = { }; +/* Structure for mutual recursion detection. */ + +typedef struct recurse_check { + struct recurse_check *prev; + const pcre_uchar *group; +} recurse_check; + + /************************************************* * Find an error text * @@ -1704,6 +1712,7 @@ Arguments: utf TRUE in UTF-8 / UTF-16 / UTF-32 mode atend TRUE if called when the pattern is complete cd the "compile data" structure + recurses chain of recurse_check to catch mutual recursion Returns: the fixed length, or -1 if there is no fixed length, @@ -1713,10 +1722,11 @@ Returns: the fixed length, */ static int -find_fixedlength(pcre_uchar *code, BOOL utf, BOOL atend, compile_data *cd) +find_fixedlength(pcre_uchar *code, BOOL utf, BOOL atend, compile_data *cd, + recurse_check *recurses) { int length = -1; - +recurse_check this_recurse; register int branchlength = 0; register pcre_uchar *cc = code + 1 + LINK_SIZE; @@ -1741,7 +1751,8 @@ for (;;) case OP_ONCE: case OP_ONCE_NC: case OP_COND: - d = find_fixedlength(cc + ((op == OP_CBRA)? IMM2_SIZE : 0), utf, atend, cd); + d = find_fixedlength(cc + ((op == OP_CBRA)? IMM2_SIZE : 0), utf, atend, cd, + recurses); if (d < 0) return d; branchlength += d; do cc += GET(cc, 1); while (*cc == OP_ALT); @@ -1775,7 +1786,15 @@ for (;;) cs = ce = (pcre_uchar *)cd->start_code + GET(cc, 1); /* Start subpattern */ do ce += GET(ce, 1); while (*ce == OP_ALT); /* End subpattern */ if (cc > cs && cc < ce) return -1; /* Recursion */ - d = find_fixedlength(cs + IMM2_SIZE, utf, atend, cd); + else /* Check for mutual recursion */ + { + recurse_check *r = recurses; + for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break; + if (r != NULL) return -1; /* Mutual recursion */ + } + this_recurse.prev = recurses; + this_recurse.group = cs; + d = find_fixedlength(cs + IMM2_SIZE, utf, atend, cd, &this_recurse); if (d < 0) return d; branchlength += d; cc += 1 + LINK_SIZE; @@ -2362,11 +2381,6 @@ Arguments: Returns: TRUE if what is matched could be empty */ -typedef struct recurse_check { - struct recurse_check *prev; - const pcre_uchar *group; -} recurse_check; - static BOOL could_be_empty_branch(const pcre_uchar *code, const pcre_uchar *endcode, BOOL utf, compile_data *cd, recurse_check *recurses) @@ -2497,8 +2511,8 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE); empty_branch = FALSE; do { - if (!empty_branch && could_be_empty_branch(code, endcode, utf, cd, NULL)) - empty_branch = TRUE; + if (!empty_branch && could_be_empty_branch(code, endcode, utf, cd, + recurses)) empty_branch = TRUE; code += GET(code, 1); } while (*code == OP_ALT); @@ -3093,7 +3107,7 @@ Returns: TRUE if the auto-possessification is possible static BOOL compare_opcodes(const pcre_uchar *code, BOOL utf, const compile_data *cd, - const pcre_uint32 *base_list, const pcre_uchar *base_end) + const pcre_uint32 *base_list, const pcre_uchar *base_end, int *rec_limit) { pcre_uchar c; pcre_uint32 list[8]; @@ -3110,6 +3124,9 @@ pcre_uint32 chr; BOOL accepted, invert_bits; BOOL entered_a_group = FALSE; +if (*rec_limit == 0) return FALSE; +--(*rec_limit); + /* Note: the base_list[1] contains whether the current opcode has greedy (represented by a non-zero value) quantifier. This is a different from other character type lists, which stores here that the character iterator @@ -3180,7 +3197,8 @@ for(;;) while (*next_code == OP_ALT) { - if (!compare_opcodes(code, utf, cd, base_list, base_end)) return FALSE; + if (!compare_opcodes(code, utf, cd, base_list, base_end, rec_limit)) + return FALSE; code = next_code + 1 + LINK_SIZE; next_code += GET(next_code, 1); } @@ -3200,7 +3218,7 @@ for(;;) /* The bracket content will be checked by the OP_BRA/OP_CBRA case above. */ next_code += 1 + LINK_SIZE; - if (!compare_opcodes(next_code, utf, cd, base_list, base_end)) + if (!compare_opcodes(next_code, utf, cd, base_list, base_end, rec_limit)) return FALSE; code += PRIV(OP_lengths)[c]; @@ -3633,6 +3651,7 @@ register pcre_uchar c; const pcre_uchar *end; pcre_uchar *repeat_opcode; pcre_uint32 list[8]; +int rec_limit; for (;;) { @@ -3653,7 +3672,8 @@ for (;;) get_chr_property_list(code, utf, cd->fcc, list) : NULL; list[1] = c == OP_STAR || c == OP_PLUS || c == OP_QUERY || c == OP_UPTO; - if (end != NULL && compare_opcodes(end, utf, cd, list, end)) + rec_limit = 1000; + if (end != NULL && compare_opcodes(end, utf, cd, list, end, &rec_limit)) { switch(c) { @@ -3709,7 +3729,8 @@ for (;;) list[1] = (c & 1) == 0; - if (compare_opcodes(end, utf, cd, list, end)) + rec_limit = 1000; + if (compare_opcodes(end, utf, cd, list, end, &rec_limit)) { switch (c) { @@ -4002,7 +4023,7 @@ while ((ptr = (pcre_uchar *)find_recurse(ptr, utf)) != NULL) /* See if this recursion is on the forward reference list. If so, adjust the reference. */ - for (hc = (pcre_uchar *)cd->start_workspace + save_hwm_offset; hc < cd->hwm; + for (hc = (pcre_uchar *)cd->start_workspace + save_hwm_offset; hc < cd->hwm; hc += LINK_SIZE) { offset = (int)GET(hc, 0); @@ -4208,7 +4229,11 @@ if ((options & PCRE_CASELESS) != 0) range. Otherwise, use a recursive call to add the additional range. */ else if (oc < start && od >= start - 1) start = oc; /* Extend downwards */ - else if (od > end && oc <= end + 1) end = od; /* Extend upwards */ + else if (od > end && oc <= end + 1) + { + end = od; /* Extend upwards */ + if (end > classbits_end) classbits_end = (end <= 0xff ? end : 0xff); + } else n8 += add_to_class(classbits, uchardptr, options, cd, oc, od); } } @@ -5509,6 +5534,12 @@ for (;; ptr++) } #endif + /* Even though any XCLASS list is now discarded, we must allow for + its memory. */ + + if (lengthptr != NULL) + *lengthptr += (int)(class_uchardata - class_uchardata_base); + /* If there are no characters > 255, or they are all to be included or excluded, set the opcode to OP_CLASS or OP_NCLASS, depending on whether the whole class was negated and whether there were negative specials such as \S @@ -5907,6 +5938,7 @@ for (;; ptr++) { register int i; int len = (int)(code - previous); + size_t base_hwm_offset = save_hwm_offset; pcre_uchar *bralink = NULL; pcre_uchar *brazeroptr = NULL; @@ -6052,21 +6084,21 @@ for (;; ptr++) memcpy(code, previous, IN_UCHARS(len)); while (cd->hwm > cd->start_workspace + cd->workspace_size - - WORK_SIZE_SAFETY_MARGIN - - (this_hwm_offset - save_hwm_offset)) + WORK_SIZE_SAFETY_MARGIN - + (this_hwm_offset - base_hwm_offset)) { *errorcodeptr = expand_workspace(cd); if (*errorcodeptr != 0) goto FAILED; } - for (hc = (pcre_uchar *)cd->start_workspace + save_hwm_offset; - hc < (pcre_uchar *)cd->start_workspace + this_hwm_offset; + for (hc = (pcre_uchar *)cd->start_workspace + base_hwm_offset; + hc < (pcre_uchar *)cd->start_workspace + this_hwm_offset; hc += LINK_SIZE) { PUT(cd->hwm, 0, GET(hc, 0) + len); cd->hwm += LINK_SIZE; } - save_hwm_offset = this_hwm_offset; + base_hwm_offset = this_hwm_offset; code += len; } } @@ -6133,21 +6165,21 @@ for (;; ptr++) copying them. */ while (cd->hwm > cd->start_workspace + cd->workspace_size - - WORK_SIZE_SAFETY_MARGIN - - (this_hwm_offset - save_hwm_offset)) + WORK_SIZE_SAFETY_MARGIN - + (this_hwm_offset - base_hwm_offset)) { *errorcodeptr = expand_workspace(cd); if (*errorcodeptr != 0) goto FAILED; } - for (hc = (pcre_uchar *)cd->start_workspace + save_hwm_offset; - hc < (pcre_uchar *)cd->start_workspace + this_hwm_offset; + for (hc = (pcre_uchar *)cd->start_workspace + base_hwm_offset; + hc < (pcre_uchar *)cd->start_workspace + this_hwm_offset; hc += LINK_SIZE) { PUT(cd->hwm, 0, GET(hc, 0) + len + ((i != 0)? 2+LINK_SIZE : 1)); cd->hwm += LINK_SIZE; } - save_hwm_offset = this_hwm_offset; + base_hwm_offset = this_hwm_offset; code += len; } @@ -6455,15 +6487,25 @@ for (;; ptr++) parenthesis forms. */ case CHAR_LEFT_PARENTHESIS: - newoptions = options; - skipbytes = 0; - bravalue = OP_CBRA; - save_hwm_offset = cd->hwm - cd->start_workspace; - reset_bracount = FALSE; + ptr++; - /* First deal with various "verbs" that can be introduced by '*'. */ + /* First deal with comments. Putting this code right at the start ensures + that comments have no bad side effects. */ + + if (ptr[0] == CHAR_QUESTION_MARK && ptr[1] == CHAR_NUMBER_SIGN) + { + ptr += 2; + while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++; + if (*ptr == CHAR_NULL) + { + *errorcodeptr = ERR18; + goto FAILED; + } + continue; + } + + /* Now deal with various "verbs" that can be introduced by '*'. */ - ptr++; if (ptr[0] == CHAR_ASTERISK && (ptr[1] == ':' || (MAX_255(ptr[1]) && ((cd->ctypes[ptr[1]] & ctype_letter) != 0)))) { @@ -6584,10 +6626,18 @@ for (;; ptr++) goto FAILED; } + /* Initialize for "real" parentheses */ + + newoptions = options; + skipbytes = 0; + bravalue = OP_CBRA; + save_hwm_offset = cd->hwm - cd->start_workspace; + reset_bracount = FALSE; + /* Deal with the extended parentheses; all are introduced by '?', and the appearance of any of them means that this is not a capturing group. */ - else if (*ptr == CHAR_QUESTION_MARK) + if (*ptr == CHAR_QUESTION_MARK) { int i, set, unset, namelen; int *optset; @@ -6596,17 +6646,6 @@ for (;; ptr++) switch (*(++ptr)) { - case CHAR_NUMBER_SIGN: /* Comment; skip to ket */ - ptr++; - while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++; - if (*ptr == CHAR_NULL) - { - *errorcodeptr = ERR18; - goto FAILED; - } - continue; - - /* ------------------------------------------------------------ */ case CHAR_VERTICAL_LINE: /* Reset capture count for each branch */ reset_bracount = TRUE; @@ -6655,7 +6694,9 @@ for (;; ptr++) if (tempptr[1] == CHAR_QUESTION_MARK && (tempptr[2] == CHAR_EQUALS_SIGN || tempptr[2] == CHAR_EXCLAMATION_MARK || - tempptr[2] == CHAR_LESS_THAN_SIGN)) + (tempptr[2] == CHAR_LESS_THAN_SIGN && + (tempptr[3] == CHAR_EQUALS_SIGN || + tempptr[3] == CHAR_EXCLAMATION_MARK)))) { cd->iscondassert = TRUE; break; @@ -8264,7 +8305,7 @@ for (;;) int fixed_length; *code = OP_END; fixed_length = find_fixedlength(last_branch, (options & PCRE_UTF8) != 0, - FALSE, cd); + FALSE, cd, NULL); DPRINTF(("fixed length = %d\n", fixed_length)); if (fixed_length == -3) { @@ -8549,6 +8590,7 @@ do { case OP_RREF: case OP_DNRREF: case OP_DEF: + case OP_FAIL: return FALSE; default: /* Assertion */ @@ -9366,7 +9408,7 @@ if (cd->check_lookbehind) int end_op = *be; *be = OP_END; fixed_length = find_fixedlength(cc, (re->options & PCRE_UTF8) != 0, TRUE, - cd); + cd, NULL); *be = end_op; DPRINTF(("fixed length = %d\n", fixed_length)); if (fixed_length < 0) diff --git a/src/3rdparty/pcre/pcre_dfa_exec.c b/src/3rdparty/pcre/pcre_dfa_exec.c index 64a495cc04..2d99787631 100644 --- a/src/3rdparty/pcre/pcre_dfa_exec.c +++ b/src/3rdparty/pcre/pcre_dfa_exec.c @@ -2736,9 +2736,10 @@ for (;;) condcode == OP_DNRREF) return PCRE_ERROR_DFA_UCOND; - /* The DEFINE condition is always false */ + /* The DEFINE condition is always false, and the assertion (?!) is + converted to OP_FAIL. */ - if (condcode == OP_DEF) + if (condcode == OP_DEF || condcode == OP_FAIL) { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); } /* The only supported version of OP_RREF is for the value RREF_ANY, diff --git a/src/3rdparty/pcre/pcre_exec.c b/src/3rdparty/pcre/pcre_exec.c index 75fa2a7329..b639a28b69 100644 --- a/src/3rdparty/pcre/pcre_exec.c +++ b/src/3rdparty/pcre/pcre_exec.c @@ -1376,6 +1376,7 @@ for (;;) break; case OP_DEF: /* DEFINE - always false */ + case OP_FAIL: /* From optimized (?!) condition */ break; /* The condition is an assertion. Call match() to evaluate it - setting @@ -3482,7 +3483,7 @@ for (;;) if (possessive) continue; /* No backtracking */ for(;;) { - if (eptr == pp) goto TAIL_RECURSE; + if (eptr <= pp) goto TAIL_RECURSE; RMATCH(eptr, ecode, offset_top, md, eptrb, RM23); if (rrc != MATCH_NOMATCH) RRETURN(rrc); #ifdef SUPPORT_UCP @@ -3903,7 +3904,7 @@ for (;;) if (possessive) continue; /* No backtracking */ for(;;) { - if (eptr == pp) goto TAIL_RECURSE; + if (eptr <= pp) goto TAIL_RECURSE; RMATCH(eptr, ecode, offset_top, md, eptrb, RM30); if (rrc != MATCH_NOMATCH) RRETURN(rrc); eptr--; @@ -4038,7 +4039,7 @@ for (;;) if (possessive) continue; /* No backtracking */ for(;;) { - if (eptr == pp) goto TAIL_RECURSE; + if (eptr <= pp) goto TAIL_RECURSE; RMATCH(eptr, ecode, offset_top, md, eptrb, RM34); if (rrc != MATCH_NOMATCH) RRETURN(rrc); eptr--; @@ -5609,7 +5610,7 @@ for (;;) if (possessive) continue; /* No backtracking */ for(;;) { - if (eptr == pp) goto TAIL_RECURSE; + if (eptr <= pp) goto TAIL_RECURSE; RMATCH(eptr, ecode, offset_top, md, eptrb, RM44); if (rrc != MATCH_NOMATCH) RRETURN(rrc); eptr--; @@ -5651,12 +5652,17 @@ for (;;) if (possessive) continue; /* No backtracking */ + /* We use <= pp rather than == pp to detect the start of the run while + backtracking because the use of \C in UTF mode can cause BACKCHAR to + move back past pp. This is just palliative; the use of \C in UTF mode + is fraught with danger. */ + for(;;) { int lgb, rgb; PCRE_PUCHAR fptr; - if (eptr == pp) goto TAIL_RECURSE; /* At start of char run */ + if (eptr <= pp) goto TAIL_RECURSE; /* At start of char run */ RMATCH(eptr, ecode, offset_top, md, eptrb, RM45); if (rrc != MATCH_NOMATCH) RRETURN(rrc); @@ -5674,7 +5680,7 @@ for (;;) for (;;) { - if (eptr == pp) goto TAIL_RECURSE; /* At start of char run */ + if (eptr <= pp) goto TAIL_RECURSE; /* At start of char run */ fptr = eptr - 1; if (!utf) c = *fptr; else { @@ -5924,7 +5930,7 @@ for (;;) if (possessive) continue; /* No backtracking */ for(;;) { - if (eptr == pp) goto TAIL_RECURSE; + if (eptr <= pp) goto TAIL_RECURSE; RMATCH(eptr, ecode, offset_top, md, eptrb, RM46); if (rrc != MATCH_NOMATCH) RRETURN(rrc); eptr--; diff --git a/src/3rdparty/pcre/pcre_jit_compile.c b/src/3rdparty/pcre/pcre_jit_compile.c index 795a5d2b47..0901c1bd53 100644 --- a/src/3rdparty/pcre/pcre_jit_compile.c +++ b/src/3rdparty/pcre/pcre_jit_compile.c @@ -2108,7 +2108,7 @@ sljit_uw *result; if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) return NULL; -result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), common->allocator_data); +result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data); if (SLJIT_UNLIKELY(result == NULL)) { sljit_set_compiler_memory_error(compiler); @@ -6997,7 +6997,7 @@ cc += GET(cc, 1); has_alternatives = *cc == OP_ALT; if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND)) - has_alternatives = (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF) ? FALSE : TRUE; + has_alternatives = (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF || *matchingpath == OP_FAIL) ? FALSE : TRUE; if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN)) opcode = OP_SCOND; @@ -7255,12 +7255,14 @@ if (opcode == OP_COND || opcode == OP_SCOND) add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_ZERO)); matchingpath += 1 + 2 * IMM2_SIZE; } - else if (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF) + else if (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF || *matchingpath == OP_FAIL) { /* Never has other case. */ BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL; SLJIT_ASSERT(!has_alternatives); + if (*matchingpath == OP_FAIL) + stacksize = 0; if (*matchingpath == OP_RREF) { stacksize = GET2(matchingpath, 1); diff --git a/src/3rdparty/pcre/sljit/sljitLir.c b/src/3rdparty/pcre/sljit/sljitLir.c index 8112e8da6e..5039a7e04e 100644 --- a/src/3rdparty/pcre/sljit/sljitLir.c +++ b/src/3rdparty/pcre/sljit/sljitLir.c @@ -435,6 +435,12 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_compiler(struct sljit_compiler *compile SLJIT_FREE(compiler, allocator_data); } +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_compiler_memory_error(struct sljit_compiler *compiler) +{ + if (compiler->error == SLJIT_SUCCESS) + compiler->error = SLJIT_ERR_ALLOC_FAILED; +} + #if (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code) { diff --git a/src/3rdparty/pcre/sljit/sljitLir.h b/src/3rdparty/pcre/sljit/sljitLir.h index 79f1d102c1..24c0f60399 100644 --- a/src/3rdparty/pcre/sljit/sljitLir.h +++ b/src/3rdparty/pcre/sljit/sljitLir.h @@ -429,11 +429,13 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_compiler(struct sljit_compiler *compile these checks increases the performance of the compiling process. */ static SLJIT_INLINE sljit_si sljit_get_compiler_error(struct sljit_compiler *compiler) { return compiler->error; } -/* Sets the compiler error code to SLJIT_ERR_ALLOC_FAILED. After - the error code is set, the compiler behaves as if itself detected - an allocation failure. This can greatly simplify error management, - since only the compiler needs to be checked after compilation. */ -static SLJIT_INLINE void sljit_set_compiler_memory_error(struct sljit_compiler *compiler) { compiler->error = SLJIT_ERR_ALLOC_FAILED; } +/* Sets the compiler error code to SLJIT_ERR_ALLOC_FAILED except + if an error was detected before. After the error code is set + the compiler behaves as if the allocation failure happened + during an sljit function call. This can greatly simplify error + checking, since only the compiler status needs to be checked + after the compilation. */ +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_compiler_memory_error(struct sljit_compiler *compiler); /* Allocate a small amount of memory. The size must be <= 64 bytes on 32 bit, -- cgit v1.2.3