Update PCRE to SVN r1622 (8.38 + patches)

In preparation for the 5.6 release. Upstream changelog: http://vcs.pcre.org/pcre/code/trunk/ChangeLog?revision=1622&view=markup&pathrev=1622 Upstream changes: http://vcs.pcre.org/pcre/code/trunk/?pathrev=1622 Change-Id: I97fb23f6b42f4871f05daa726c2edfc691a16d8e Reviewed-by: Lars Knoll <lars.knoll@theqtcompany.com>
author: Giuseppe D'Angelo <giuseppe.dangelo@kdab.com> 2015-12-18 13:19:10 +0100
committer: Giuseppe D'Angelo <giuseppe.dangelo@kdab.com> 2016-01-04 08:56:28 +0000
commit: 3880f41e683f02b905c8cbc3c578c3f3a0a1eb2e (patch)
tree: 8bb684c2847863f60e577f5f95e88e743730c946 /src
parent: b08d84d39e11b6abab4e941050c22f37ca79a83a (diff)
9 files changed, 284 insertions, 157 deletions
diff --git a/src/3rdparty/pcre/pcre.h b/src/3rdparty/pcre/pcre.h
index c2557cf4b1..609deb5be6 100644
--- a/src/3rdparty/pcre/pcre.h
+++ b/src/3rdparty/pcre/pcre.h
@@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE.
 /* The current PCRE version information. */
 
 #define PCRE_MAJOR          8
-#define PCRE_MINOR          38
+#define PCRE_MINOR          39
 #define PCRE_PRERELEASE     -RC1
-#define PCRE_DATE           2015-05-03
+#define PCRE_DATE           2015-11-23
 
 /* When an application links to a PCRE DLL in Windows, the symbols that are
 imported have to be identified as such. When building PCRE, the appropriate
diff --git a/src/3rdparty/pcre/pcre_compile.c b/src/3rdparty/pcre/pcre_compile.c
index 866aa8c693..b9a239e554 100644
--- a/src/3rdparty/pcre/pcre_compile.c
+++ b/src/3rdparty/pcre/pcre_compile.c
@@ -4639,16 +4639,16 @@ for (;; ptr++)
   /* In the real compile phase, just check the workspace used by the forward
   reference list. */
 
-  else if (cd->hwm > cd->start_workspace + cd->workspace_size -
-           WORK_SIZE_SAFETY_MARGIN)
+  else if (cd->hwm > cd->start_workspace + cd->workspace_size)
     {
     *errorcodeptr = ERR52;
     goto FAILED;
     }
 
-  /* If in \Q...\E, check for the end; if not, we have a literal */
+  /* If in \Q...\E, check for the end; if not, we have a literal. Otherwise an
+  isolated \E is ignored. */
 
-  if (inescq && c != CHAR_NULL)
+  if (c != CHAR_NULL)
     {
     if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E)
       {
@@ -4656,7 +4656,7 @@ for (;; ptr++)
       ptr++;
       continue;
       }
-    else
+    else if (inescq)
       {
       if (previous_callout != NULL)
         {
@@ -4671,18 +4671,27 @@ for (;; ptr++)
         }
       goto NORMAL_CHAR;
       }
-    /* Control does not reach here. */
+
+    /* Check for the start of a \Q...\E sequence. We must do this here rather
+    than later in case it is immediately followed by \E, which turns it into a
+    "do nothing" sequence. */
+
+    if (c == CHAR_BACKSLASH && ptr[1] == CHAR_Q)
+      {
+      inescq = TRUE;
+      ptr++;
+      continue;
+      }
     }
 
-  /* In extended mode, skip white space and comments. We need a loop in order
-  to check for more white space and more comments after a comment. */
+  /* In extended mode, skip white space and comments. */
 
   if ((options & PCRE_EXTENDED) != 0)
     {
-    for (;;)
+    const pcre_uchar *wscptr = ptr;
+    while (MAX_255(c) && (cd->ctypes[c] & ctype_space) != 0) c = *(++ptr);
+    if (c == CHAR_NUMBER_SIGN)
       {
-      while (MAX_255(c) && (cd->ctypes[c] & ctype_space) != 0) c = *(++ptr);
-      if (c != CHAR_NUMBER_SIGN) break;
       ptr++;
       while (*ptr != CHAR_NULL)
         {
@@ -4696,10 +4705,35 @@ for (;; ptr++)
         if (utf) FORWARDCHAR(ptr);
 #endif
         }
-      c = *ptr;     /* Either NULL or the char after a newline */
+      }
+
+    /* If we skipped any characters, restart the loop. Otherwise, we didn't see
+    a comment. */
+
+    if (ptr > wscptr)
+      {
+      ptr--;
+      continue;
       }
     }
 
+  /* Skip over (?# comments. We need to do this here because we want to know if
+  the next thing is a quantifier, and these comments may come between an item
+  and its quantifier. */
+
+  if (c == CHAR_LEFT_PARENTHESIS && ptr[1] == CHAR_QUESTION_MARK &&
+      ptr[2] == CHAR_NUMBER_SIGN)
+    {
+    ptr += 3;
+    while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
+    if (*ptr == CHAR_NULL)
+      {
+      *errorcodeptr = ERR18;
+      goto FAILED;
+      }
+    continue;
+    }
+
   /* See if the next thing is a quantifier. */
 
   is_quantifier =
@@ -4941,9 +4975,10 @@ for (;; ptr++)
       (which is on the stack). We have to remember that there was XCLASS data,
       however. */
 
+      if (class_uchardata > class_uchardata_base) xclass = TRUE;
+
       if (lengthptr != NULL && class_uchardata > class_uchardata_base)
         {
-        xclass = TRUE;
         *lengthptr += (int)(class_uchardata - class_uchardata_base);
         class_uchardata = class_uchardata_base;
         }
@@ -5046,10 +5081,28 @@ for (;; ptr++)
             ptr = tempptr + 1;
             continue;
 
-            /* For all other POSIX classes, no special action is taken in UCP
-            mode. Fall through to the non_UCP case. */
+            /* For the other POSIX classes (ascii, cntrl, xdigit) we are going
+            to fall through to the non-UCP case and build a bit map for
+            characters with code points less than 256. If we are in a negated
+            POSIX class, characters with code points greater than 255 must
+            either all match or all not match. In the special case where we
+            have not yet generated any xclass data, and this is the final item
+            in the overall class, we need do nothing: later on, the opcode
+            OP_NCLASS will be used to indicate that characters greater than 255
+            are acceptable. If we have already seen an xclass item or one may
+            follow (we have to assume that it might if this is not the end of
+            the class), explicitly list all wide codepoints, which will then
+            either not match or match, depending on whether the class is or is
+            not negated. */
 
             default:
+            if (local_negate &&
+                (xclass || tempptr[2] != CHAR_RIGHT_SQUARE_BRACKET))
+              {
+              *class_uchardata++ = XCL_RANGE;
+              class_uchardata += PRIV(ord2utf)(0x100, class_uchardata);
+              class_uchardata += PRIV(ord2utf)(0x10ffff, class_uchardata);
+              }
             break;
             }
           }
@@ -5388,16 +5441,20 @@ for (;; ptr++)
       CLASS_SINGLE_CHARACTER:
       if (class_one_char < 2) class_one_char++;
 
-      /* If class_one_char is 1, we have the first single character in the
-      class, and there have been no prior ranges, or XCLASS items generated by
-      escapes. If this is the final character in the class, we can optimize by
-      turning the item into a 1-character OP_CHAR[I] if it's positive, or
-      OP_NOT[I] if it's negative. In the positive case, it can cause firstchar
-      to be set. Otherwise, there can be no first char if this item is first,
-      whatever repeat count may follow. In the case of reqchar, save the
-      previous value for reinstating. */
+      /* If xclass_has_prop is false and class_one_char is 1, we have the first
+      single character in the class, and there have been no prior ranges, or
+      XCLASS items generated by escapes. If this is the final character in the
+      class, we can optimize by turning the item into a 1-character OP_CHAR[I]
+      if it's positive, or OP_NOT[I] if it's negative. In the positive case, it
+      can cause firstchar to be set. Otherwise, there can be no first char if
+      this item is first, whatever repeat count may follow. In the case of
+      reqchar, save the previous value for reinstating. */
 
-      if (!inescq && class_one_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
+      if (!inescq &&
+#ifdef SUPPORT_UCP
+          !xclass_has_prop &&
+#endif
+          class_one_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
         {
         ptr++;
         zeroreqchar = reqchar;
@@ -5513,9 +5570,10 @@ for (;; ptr++)
     actual compiled code. */
 
 #ifdef SUPPORT_UTF
-    if (xclass && (!should_flip_negation || (options & PCRE_UCP) != 0))
+    if (xclass && (xclass_has_prop || !should_flip_negation ||
+        (options & PCRE_UCP) != 0))
 #elif !defined COMPILE_PCRE8
-    if (xclass && !should_flip_negation)
+    if (xclass && (xclass_has_prop || !should_flip_negation))
 #endif
 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
       {
@@ -6508,21 +6566,6 @@ for (;; ptr++)
     case CHAR_LEFT_PARENTHESIS:
     ptr++;
 
-    /* First deal with comments. Putting this code right at the start ensures
-    that comments have no bad side effects. */
-
-    if (ptr[0] == CHAR_QUESTION_MARK && ptr[1] == CHAR_NUMBER_SIGN)
-      {
-      ptr += 2;
-      while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
-      if (*ptr == CHAR_NULL)
-        {
-        *errorcodeptr = ERR18;
-        goto FAILED;
-        }
-      continue;
-      }
-
     /* Now deal with various "verbs" that can be introduced by '*'. */
 
     if (ptr[0] == CHAR_ASTERISK && (ptr[1] == ':'
@@ -6613,9 +6656,17 @@ for (;; ptr++)
               goto FAILED;
               }
             setverb = *code++ = verbs[i].op_arg;
-            *code++ = arglen;
-            memcpy(code, arg, IN_UCHARS(arglen));
-            code += arglen;
+            if (lengthptr != NULL)    /* In pass 1 just add in the length */
+              {                       /* to avoid potential workspace */
+              *lengthptr += arglen;   /* overflow. */
+              *code++ = 0;
+              }
+            else
+              {
+              *code++ = arglen;
+              memcpy(code, arg, IN_UCHARS(arglen));
+              code += arglen;
+              }
             *code++ = 0;
             }
 
@@ -6668,7 +6719,7 @@ for (;; ptr++)
         /* ------------------------------------------------------------ */
         case CHAR_VERTICAL_LINE:  /* Reset capture count for each branch */
         reset_bracount = TRUE;
-        cd->dupgroups = TRUE;     /* Record (?| encountered */ 
+        cd->dupgroups = TRUE;     /* Record (?| encountered */
         /* Fall through */
 
         /* ------------------------------------------------------------ */
@@ -6769,11 +6820,11 @@ for (;; ptr++)
           {
           while (IS_DIGIT(*ptr))
             {
-            if (recno > INT_MAX / 10 - 1)  /* Integer overflow */              
-              {                                                             
-              while (IS_DIGIT(*ptr)) ptr++;                                 
-              *errorcodeptr = ERR61;                                        
-              goto FAILED; 
+            if (recno > INT_MAX / 10 - 1)  /* Integer overflow */
+              {
+              while (IS_DIGIT(*ptr)) ptr++;
+              *errorcodeptr = ERR61;
+              goto FAILED;
               }
             recno = recno * 10 + (int)(*ptr - CHAR_0);
             ptr++;
@@ -6909,11 +6960,11 @@ for (;; ptr++)
               *errorcodeptr = ERR15;
               goto FAILED;
               }
-            if (recno > INT_MAX / 10 - 1)   /* Integer overflow */          
-              {                                                                
-              *errorcodeptr = ERR61;                                        
-              goto FAILED;                                
-              }   
+            if (recno > INT_MAX / 10 - 1)   /* Integer overflow */
+              {
+              *errorcodeptr = ERR61;
+              goto FAILED;
+              }
             recno = recno * 10 + name[i] - CHAR_0;
             }
           if (recno == 0) recno = RREF_ANY;
@@ -7191,7 +7242,7 @@ for (;; ptr++)
           {
           named_group *ng;
           recno = 0;
-           
+
           if (namelen == 0)
             {
             *errorcodeptr = ERR62;
@@ -7229,24 +7280,24 @@ for (;; ptr++)
           issue is fixed "properly" in PCRE2. As PCRE1 is now in maintenance
           only mode, we finesse the bug by allowing more memory always. */
 
-          *lengthptr += 2 + 2*LINK_SIZE;
-          
+          *lengthptr += 4 + 4*LINK_SIZE;
+
           /* It is even worse than that. The current reference may be to an
           existing named group with a different number (so apparently not
           recursive) but which later on is also attached to a group with the
-          current number. This can only happen if $(| has been previous 
-          encountered. In that case, we allow yet more memory, just in case. 
+          current number. This can only happen if $(| has been previous
+          encountered. In that case, we allow yet more memory, just in case.
           (Again, this is fixed "properly" in PCRE2. */
-          
+
           if (cd->dupgroups) *lengthptr += 4 + 4*LINK_SIZE;
 
           /* Otherwise, check for recursion here. The name table does not exist
           in the first pass; instead we must scan the list of names encountered
           so far in order to get the number. If the name is not found, leave
           the value of recno as 0 for a forward reference. */
-           
+
           else
-            { 
+            {
             ng = cd->named_groups;
             for (i = 0; i < cd->names_found; i++, ng++)
               {
@@ -7266,7 +7317,7 @@ for (;; ptr++)
                   }
                 }
               }
-            }   
+            }
           }
 
         /* In the real compile, search the name table. We check the name
@@ -7556,39 +7607,15 @@ for (;; ptr++)
         newoptions = (options | set) & (~unset);
 
         /* If the options ended with ')' this is not the start of a nested
-        group with option changes, so the options change at this level. If this
-        item is right at the start of the pattern, the options can be
-        abstracted and made external in the pre-compile phase, and ignored in
-        the compile phase. This can be helpful when matching -- for instance in
-        caseless checking of required bytes.
-
-        If the code pointer is not (cd->start_code + 1 + LINK_SIZE), we are
-        definitely *not* at the start of the pattern because something has been
-        compiled. In the pre-compile phase, however, the code pointer can have
-        that value after the start, because it gets reset as code is discarded
-        during the pre-compile. However, this can happen only at top level - if
-        we are within parentheses, the starting BRA will still be present. At
-        any parenthesis level, the length value can be used to test if anything
-        has been compiled at that level. Thus, a test for both these conditions
-        is necessary to ensure we correctly detect the start of the pattern in
-        both phases.
-
+        group with option changes, so the options change at this level.
         If we are not at the pattern start, reset the greedy defaults and the
         case value for firstchar and reqchar. */
 
         if (*ptr == CHAR_RIGHT_PARENTHESIS)
           {
-          if (code == cd->start_code + 1 + LINK_SIZE &&
-               (lengthptr == NULL || *lengthptr == 2 + 2*LINK_SIZE))
-            {
-            cd->external_options = newoptions;
-            }
-          else
-            {
-            greedy_default = ((newoptions & PCRE_UNGREEDY) != 0);
-            greedy_non_default = greedy_default ^ 1;
-            req_caseopt = ((newoptions & PCRE_CASELESS) != 0)? REQ_CASELESS:0;
-            }
+          greedy_default = ((newoptions & PCRE_UNGREEDY) != 0);
+          greedy_non_default = greedy_default ^ 1;
+          req_caseopt = ((newoptions & PCRE_CASELESS) != 0)? REQ_CASELESS:0;
 
           /* Change options at this level, and pass them back for use
           in subsequent branches. */
@@ -7867,16 +7894,6 @@ for (;; ptr++)
       c = ec;
     else
       {
-      if (escape == ESC_Q)            /* Handle start of quoted string */
-        {
-        if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)
-          ptr += 2;               /* avoid empty string */
-            else inescq = TRUE;
-        continue;
-        }
-
-      if (escape == ESC_E) continue;  /* Perl ignores an orphan \E */
-
       /* For metasequences that actually match a character, we disable the
       setting of a first character if it hasn't already been set. */
 
@@ -9296,7 +9313,7 @@ if (errorcode != 0) goto PCRE_EARLY_ERROR_RETURN;
 
 DPRINTF(("end pre-compile: length=%d workspace=%d\n", length,
   (int)(cd->hwm - cworkspace)));
-  
+
 if (length > MAX_PATTERN_SIZE)
   {
   errorcode = ERR20;
@@ -9434,16 +9451,16 @@ if (cd->hwm > cd->start_workspace)
     int offset, recno;
     cd->hwm -= LINK_SIZE;
     offset = GET(cd->hwm, 0);
-    
+
     /* Check that the hwm handling hasn't gone wrong. This whole area is
-    rewritten in PCRE2 because there are some obscure cases. */ 
-     
+    rewritten in PCRE2 because there are some obscure cases. */
+
     if (offset == 0 || codestart[offset-1] != OP_RECURSE)
       {
-      errorcode = ERR10; 
+      errorcode = ERR10;
       break;
-      }  
- 
+      }
+
     recno = GET(codestart, offset);
     if (recno != prev_recno)
       {
diff --git a/src/3rdparty/pcre/pcre_get.c b/src/3rdparty/pcre/pcre_get.c
index 8094b34bbf..cdd2abc80f 100644
--- a/src/3rdparty/pcre/pcre_get.c
+++ b/src/3rdparty/pcre/pcre_get.c
@@ -250,6 +250,7 @@ Arguments:
   code         the compiled regex
   stringname   the name of the capturing substring
   ovector      the vector of matched substrings
+  stringcount  number of captured substrings 
 
 Returns:       the number of the first that is set,
                or the number of the last one if none are set,
@@ -258,13 +259,16 @@ Returns:       the number of the first that is set,
 
 #if defined COMPILE_PCRE8
 static int
-get_first_set(const pcre *code, const char *stringname, int *ovector)
+get_first_set(const pcre *code, const char *stringname, int *ovector,
+  int stringcount)
 #elif defined COMPILE_PCRE16
 static int
-get_first_set(const pcre16 *code, PCRE_SPTR16 stringname, int *ovector)
+get_first_set(const pcre16 *code, PCRE_SPTR16 stringname, int *ovector,
+  int stringcount)
 #elif defined COMPILE_PCRE32
 static int
-get_first_set(const pcre32 *code, PCRE_SPTR32 stringname, int *ovector)
+get_first_set(const pcre32 *code, PCRE_SPTR32 stringname, int *ovector,
+  int stringcount)
 #endif
 {
 const REAL_PCRE *re = (const REAL_PCRE *)code;
@@ -295,7 +299,7 @@ if (entrysize <= 0) return entrysize;
 for (entry = (pcre_uchar *)first; entry <= (pcre_uchar *)last; entry += entrysize)
   {
   int n = GET2(entry, 0);
-  if (ovector[n*2] >= 0) return n;
+  if (n < stringcount && ovector[n*2] >= 0) return n;
   }
 return GET2(entry, 0);
 }
@@ -402,7 +406,7 @@ pcre32_copy_named_substring(const pcre32 *code, PCRE_SPTR32 subject,
   PCRE_UCHAR32 *buffer, int size)
 #endif
 {
-int n = get_first_set(code, stringname, ovector);
+int n = get_first_set(code, stringname, ovector, stringcount);
 if (n <= 0) return n;
 #if defined COMPILE_PCRE8
 return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
@@ -457,7 +461,10 @@ pcre_uchar **stringlist;
 pcre_uchar *p;
 
 for (i = 0; i < double_count; i += 2)
-  size += sizeof(pcre_uchar *) + IN_UCHARS(ovector[i+1] - ovector[i] + 1);
+  {
+  size += sizeof(pcre_uchar *) + IN_UCHARS(1);
+  if (ovector[i+1] > ovector[i]) size += IN_UCHARS(ovector[i+1] - ovector[i]);
+  } 
 
 stringlist = (pcre_uchar **)(PUBL(malloc))(size);
 if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
@@ -473,7 +480,7 @@ p = (pcre_uchar *)(stringlist + stringcount + 1);
 
 for (i = 0; i < double_count; i += 2)
   {
-  int len = ovector[i+1] - ovector[i];
+  int len = (ovector[i+1] > ovector[i])? (ovector[i+1] - ovector[i]) : 0;
   memcpy(p, subject + ovector[i], IN_UCHARS(len));
   *stringlist++ = p;
   p += len;
@@ -619,7 +626,7 @@ pcre32_get_named_substring(const pcre32 *code, PCRE_SPTR32 subject,
   PCRE_SPTR32 *stringptr)
 #endif
 {
-int n = get_first_set(code, stringname, ovector);
+int n = get_first_set(code, stringname, ovector, stringcount);
 if (n <= 0) return n;
 #if defined COMPILE_PCRE8
 return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
diff --git a/src/3rdparty/pcre/pcre_internal.h b/src/3rdparty/pcre/pcre_internal.h
index 544d9c0709..f7a5ee7aa6 100644
--- a/src/3rdparty/pcre/pcre_internal.h
+++ b/src/3rdparty/pcre/pcre_internal.h
@@ -2454,7 +2454,7 @@ typedef struct compile_data {
   BOOL had_pruneorskip;             /* (*PRUNE) or (*SKIP) encountered */
   BOOL check_lookbehind;            /* Lookbehinds need later checking */
   BOOL dupnames;                    /* Duplicate names exist */
-  BOOL dupgroups;                   /* Duplicate groups exist: (?| found */ 
+  BOOL dupgroups;                   /* Duplicate groups exist: (?| found */
   BOOL iscondassert;                /* Next assert is a condition */
   int  nltype;                      /* Newline type */
   int  nllen;                       /* Newline string length */
diff --git a/src/3rdparty/pcre/pcre_jit_compile.c b/src/3rdparty/pcre/pcre_jit_compile.c
index 868d1d91bf..445de0cbef 100644
--- a/src/3rdparty/pcre/pcre_jit_compile.c
+++ b/src/3rdparty/pcre/pcre_jit_compile.c
@@ -4342,8 +4342,10 @@ switch(length)
   case 4:
   if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
       && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
+      && (ranges[1] & (ranges[2] - ranges[0])) == 0
       && is_powerof2(ranges[2] - ranges[0]))
     {
+    SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0);
     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
     if (ranges[2] + 1 != ranges[3])
       {
@@ -4931,9 +4933,10 @@ else if ((cc[-1] & XCL_MAP) != 0)
   if (!check_class_ranges(common, (const pcre_uint8 *)cc, FALSE, TRUE, list))
     {
 #ifdef COMPILE_PCRE8
-    SLJIT_ASSERT(common->utf);
+    jump = NULL;
+    if (common->utf)
 #endif
-    jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
+      jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
 
     OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
     OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
@@ -4942,7 +4945,10 @@ else if ((cc[-1] & XCL_MAP) != 0)
     OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
     add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
 
-    JUMPHERE(jump);
+#ifdef COMPILE_PCRE8
+    if (common->utf)
+#endif
+      JUMPHERE(jump);
     }
 
   OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
@@ -5250,7 +5256,7 @@ while (*cc != XCL_END)
       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
 
       SET_CHAR_OFFSET(0);
-      OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xff);
+      OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x7f);
       OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
 
       SET_TYPE_OFFSET(ucp_Pc);
@@ -8477,8 +8483,7 @@ while (cc < ccend)
       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
       }
     BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
-    if (cc[1] > OP_ASSERTBACK_NOT)
-      count_match(common);
+    count_match(common);
     break;
 
     case OP_ONCE:
@@ -9660,7 +9665,7 @@ static SLJIT_INLINE void compile_recurse(compiler_common *common)
 DEFINE_COMPILER;
 pcre_uchar *cc = common->start + common->currententry->start;
 pcre_uchar *ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
-pcre_uchar *ccend = bracketend(cc);
+pcre_uchar *ccend = bracketend(cc) - (1 + LINK_SIZE);
 BOOL needs_control_head;
 int framesize = get_framesize(common, cc, NULL, TRUE, &needs_control_head);
 int private_data_size = get_private_data_copy_length(common, ccbegin, ccend, needs_control_head);
diff --git a/src/3rdparty/pcre/pcre_study.c b/src/3rdparty/pcre/pcre_study.c
index 932e9a7c4c..7fd0ba0b3d 100644
--- a/src/3rdparty/pcre/pcre_study.c
+++ b/src/3rdparty/pcre/pcre_study.c
@@ -71,7 +71,7 @@ Arguments:
   startcode       pointer to start of the whole pattern's code
   options         the compiling options
   recurses        chain of recurse_check to catch mutual recursion
-  countptr        pointer to call count (to catch over complexity) 
+  countptr        pointer to call count (to catch over complexity)
 
 Returns:   the minimum length
            -1 if \C in UTF-8 mode or (*ACCEPT) was encountered
diff --git a/src/3rdparty/pcre/pcre_xclass.c b/src/3rdparty/pcre/pcre_xclass.c
index c2b61f0f92..ef759a589a 100644
--- a/src/3rdparty/pcre/pcre_xclass.c
+++ b/src/3rdparty/pcre/pcre_xclass.c
@@ -246,7 +246,7 @@ while ((t = *data++) != XCL_END)
 
       case PT_PXPUNCT:
       if ((PRIV(ucp_gentype)[prop->chartype] == ucp_P ||
-            (c < 256 && PRIV(ucp_gentype)[prop->chartype] == ucp_S)) == isprop)
+            (c < 128 && PRIV(ucp_gentype)[prop->chartype] == ucp_S)) == isprop)
         return !negated;
       break;
 
diff --git a/src/3rdparty/pcre/sljit/sljitLir.h b/src/3rdparty/pcre/sljit/sljitLir.h
index f0969dac2e..2e2e9ac09c 100644
--- a/src/3rdparty/pcre/sljit/sljitLir.h
+++ b/src/3rdparty/pcre/sljit/sljitLir.h
@@ -869,34 +869,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler
 	sljit_si src1, sljit_sw src1w,
 	sljit_si src2, sljit_sw src2w);
 
-/* The following function is a helper function for sljit_emit_op_custom.
-   It returns with the real machine register index ( >=0 ) of any SLJIT_R,
-   SLJIT_S and SLJIT_SP registers.
-
-   Note: it returns with -1 for virtual registers (only on x86-32). */
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg);
-
-/* The following function is a helper function for sljit_emit_op_custom.
-   It returns with the real machine register index of any SLJIT_FLOAT register.
-
-   Note: the index is always an even number on ARM (except ARM-64), MIPS, and SPARC. */
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg);
-
-/* Any instruction can be inserted into the instruction stream by
-   sljit_emit_op_custom. It has a similar purpose as inline assembly.
-   The size parameter must match to the instruction size of the target
-   architecture:
-
-         x86: 0 < size <= 15. The instruction argument can be byte aligned.
-      Thumb2: if size == 2, the instruction argument must be 2 byte aligned.
-              if size == 4, the instruction argument must be 4 byte aligned.
-   Otherwise: size must be 4 and instruction argument must be 4 byte aligned. */
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
-	void *instruction, sljit_si size);
-
 /* Returns with non-zero if fpu is available. */
 
 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void);
@@ -1214,4 +1186,64 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct
 
 #endif /* !(defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) */
 
+/* --------------------------------------------------------------------- */
+/*  CPU specific functions                                               */
+/* --------------------------------------------------------------------- */
+
+/* The following function is a helper function for sljit_emit_op_custom.
+   It returns with the real machine register index ( >=0 ) of any SLJIT_R,
+   SLJIT_S and SLJIT_SP registers.
+
+   Note: it returns with -1 for virtual registers (only on x86-32). */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg);
+
+/* The following function is a helper function for sljit_emit_op_custom.
+   It returns with the real machine register index of any SLJIT_FLOAT register.
+
+   Note: the index is always an even number on ARM (except ARM-64), MIPS, and SPARC. */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg);
+
+/* Any instruction can be inserted into the instruction stream by
+   sljit_emit_op_custom. It has a similar purpose as inline assembly.
+   The size parameter must match to the instruction size of the target
+   architecture:
+
+         x86: 0 < size <= 15. The instruction argument can be byte aligned.
+      Thumb2: if size == 2, the instruction argument must be 2 byte aligned.
+              if size == 4, the instruction argument must be 4 byte aligned.
+   Otherwise: size must be 4 and instruction argument must be 4 byte aligned. */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
+	void *instruction, sljit_si size);
+
+#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
+
+/* Returns with non-zero if sse2 is available. */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_is_sse2_available(void);
+
+/* Returns with non-zero if cmov instruction is available. */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_is_cmov_available(void);
+
+/* Emit a conditional mov instruction on x86 CPUs. This instruction
+   moves src to destination, if the condition is satisfied. Unlike
+   other arithmetic instructions, destination must be a register.
+   Before such instructions are emitted, cmov support should be
+   checked by sljit_x86_is_cmov_available function.
+    type must be between SLJIT_EQUAL and SLJIT_S_ORDERED
+    dst_reg must be a valid register and it can be combined
+      with SLJIT_INT_OP to perform 32 bit arithmetic
+   Flags: I - (never set any flags)
+ */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_emit_cmov(struct sljit_compiler *compiler,
+	sljit_si type,
+	sljit_si dst_reg,
+	sljit_si src, sljit_sw srcw);
+
+#endif
+
 #endif /* _SLJIT_LIR_H_ */
diff --git a/src/3rdparty/pcre/sljit/sljitNativeX86_common.c b/src/3rdparty/pcre/sljit/sljitNativeX86_common.c
index e148c34cd1..416c15afaf 100644
--- a/src/3rdparty/pcre/sljit/sljitNativeX86_common.c
+++ b/src/3rdparty/pcre/sljit/sljitNativeX86_common.c
@@ -2936,3 +2936,69 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_consta
 {
 	*(sljit_sw*)addr = new_constant;
 }
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_is_sse2_available(void)
+{
+#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
+	if (cpu_has_sse2 == -1)
+		get_cpu_features();
+	return cpu_has_sse2;
+#else
+	return 1;
+#endif
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_is_cmov_available(void)
+{
+	if (cpu_has_cmov == -1)
+		get_cpu_features();
+	return cpu_has_cmov;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_emit_cmov(struct sljit_compiler *compiler,
+	sljit_si type,
+	sljit_si dst_reg,
+	sljit_si src, sljit_sw srcw)
+{
+	sljit_ub* inst;
+
+	CHECK_ERROR();
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+	CHECK_ARGUMENT(sljit_x86_is_cmov_available());
+	CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_INT_OP)));
+	CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_D_ORDERED);
+	CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(dst_reg & ~SLJIT_INT_OP));
+	FUNCTION_CHECK_SRC(src, srcw);
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+	if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+		fprintf(compiler->verbose, "  x86_cmov%s %s%s, ",
+			!(dst_reg & SLJIT_INT_OP) ? "" : ".i",
+			JUMP_PREFIX(type), jump_names[type & 0xff]);
+		sljit_verbose_reg(compiler, dst_reg & ~SLJIT_INT_OP);
+		fprintf(compiler->verbose, ", ");
+		sljit_verbose_param(compiler, src, srcw);
+		fprintf(compiler->verbose, "\n");
+	}
+#endif
+
+	ADJUST_LOCAL_OFFSET(src, srcw);
+	CHECK_EXTRA_REGS(src, srcw, (void)0);
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+	compiler->mode32 = dst_reg & SLJIT_INT_OP;
+#endif
+	dst_reg &= ~SLJIT_INT_OP;
+
+	if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
+		EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
+		src = TMP_REG1;
+		srcw = 0;
+	}
+
+	inst = emit_x86_instruction(compiler, 2, dst_reg, 0, src, srcw);
+	FAIL_IF(!inst);
+	*inst++ = GROUP_0F;
+	*inst = get_jump_code(type & 0xff) - 0x40;
+	return SLJIT_SUCCESS;
+}
author	Giuseppe D'Angelo <giuseppe.dangelo@kdab.com>	2015-12-18 13:19:10 +0100
committer	Giuseppe D'Angelo <giuseppe.dangelo@kdab.com>	2016-01-04 08:56:28 +0000
commit	3880f41e683f02b905c8cbc3c578c3f3a0a1eb2e (patch)
tree	8bb684c2847863f60e577f5f95e88e743730c946 /src
parent	b08d84d39e11b6abab4e941050c22f37ca79a83a (diff)