summaryrefslogtreecommitdiffstats
path: root/src/3rdparty/pcre
diff options
context:
space:
mode:
Diffstat (limited to 'src/3rdparty/pcre')
-rw-r--r--src/3rdparty/pcre/AUTHORS6
-rw-r--r--src/3rdparty/pcre/LICENCE6
-rw-r--r--src/3rdparty/pcre/config.h1
-rw-r--r--src/3rdparty/pcre/patches/bug_1423_jit_condition_misoptimization_fix.diff15
-rw-r--r--src/3rdparty/pcre/patches/r1340_fix_jit_on_android.patch18
-rw-r--r--src/3rdparty/pcre/pcre.h50
-rw-r--r--src/3rdparty/pcre/pcre16_valid_utf16.c25
-rw-r--r--src/3rdparty/pcre/pcre_byte_order.c19
-rw-r--r--src/3rdparty/pcre/pcre_chartables.c2
-rw-r--r--src/3rdparty/pcre/pcre_compile.c3621
-rw-r--r--src/3rdparty/pcre/pcre_config.c4
-rw-r--r--src/3rdparty/pcre/pcre_dfa_exec.c230
-rw-r--r--src/3rdparty/pcre/pcre_exec.c959
-rw-r--r--src/3rdparty/pcre/pcre_fullinfo.c16
-rw-r--r--src/3rdparty/pcre/pcre_internal.h453
-rw-r--r--src/3rdparty/pcre/pcre_jit_compile.c3414
-rw-r--r--src/3rdparty/pcre/pcre_maketables.c27
-rw-r--r--src/3rdparty/pcre/pcre_string_utils.c8
-rw-r--r--src/3rdparty/pcre/pcre_study.c86
-rw-r--r--src/3rdparty/pcre/pcre_tables.c15
-rw-r--r--src/3rdparty/pcre/pcre_ucd.c753
-rw-r--r--src/3rdparty/pcre/pcre_valid_utf8.c15
-rw-r--r--src/3rdparty/pcre/pcre_xclass.c101
-rw-r--r--src/3rdparty/pcre/sljit/sljitConfig.h1
-rw-r--r--src/3rdparty/pcre/sljit/sljitConfigInternal.h19
-rw-r--r--src/3rdparty/pcre/sljit/sljitExecAllocator.c23
-rw-r--r--src/3rdparty/pcre/sljit/sljitLir.c60
-rw-r--r--src/3rdparty/pcre/sljit/sljitLir.h37
-rw-r--r--src/3rdparty/pcre/sljit/sljitNativeARM_Thumb2.c10
-rw-r--r--src/3rdparty/pcre/sljit/sljitNativeARM_v5.c12
-rw-r--r--src/3rdparty/pcre/sljit/sljitNativeMIPS_common.c10
-rw-r--r--src/3rdparty/pcre/sljit/sljitNativePPC_common.c10
-rw-r--r--src/3rdparty/pcre/sljit/sljitNativeSPARC_common.c37
-rw-r--r--src/3rdparty/pcre/sljit/sljitNativeX86_32.c10
-rw-r--r--src/3rdparty/pcre/sljit/sljitNativeX86_common.c151
-rw-r--r--src/3rdparty/pcre/ucp.h5
36 files changed, 6598 insertions, 3631 deletions
diff --git a/src/3rdparty/pcre/AUTHORS b/src/3rdparty/pcre/AUTHORS
index ba4753d858..97d8c71dd6 100644
--- a/src/3rdparty/pcre/AUTHORS
+++ b/src/3rdparty/pcre/AUTHORS
@@ -8,7 +8,7 @@ Email domain: cam.ac.uk
University of Cambridge Computing Service,
Cambridge, England.
-Copyright (c) 1997-2012 University of Cambridge
+Copyright (c) 1997-2013 University of Cambridge
All rights reserved
@@ -19,7 +19,7 @@ Written by: Zoltan Herczeg
Email local part: hzmester
Emain domain: freemail.hu
-Copyright(c) 2010-2012 Zoltan Herczeg
+Copyright(c) 2010-2013 Zoltan Herczeg
All rights reserved.
@@ -30,7 +30,7 @@ Written by: Zoltan Herczeg
Email local part: hzmester
Emain domain: freemail.hu
-Copyright(c) 2009-2012 Zoltan Herczeg
+Copyright(c) 2009-2013 Zoltan Herczeg
All rights reserved.
diff --git a/src/3rdparty/pcre/LICENCE b/src/3rdparty/pcre/LICENCE
index 5ce31a828d..3aff6a62c0 100644
--- a/src/3rdparty/pcre/LICENCE
+++ b/src/3rdparty/pcre/LICENCE
@@ -24,7 +24,7 @@ Email domain: cam.ac.uk
University of Cambridge Computing Service,
Cambridge, England.
-Copyright (c) 1997-2012 University of Cambridge
+Copyright (c) 1997-2013 University of Cambridge
All rights reserved.
@@ -35,7 +35,7 @@ Written by: Zoltan Herczeg
Email local part: hzmester
Emain domain: freemail.hu
-Copyright(c) 2010-2012 Zoltan Herczeg
+Copyright(c) 2010-2013 Zoltan Herczeg
All rights reserved.
@@ -46,7 +46,7 @@ Written by: Zoltan Herczeg
Email local part: hzmester
Emain domain: freemail.hu
-Copyright(c) 2009-2012 Zoltan Herczeg
+Copyright(c) 2009-2013 Zoltan Herczeg
All rights reserved.
diff --git a/src/3rdparty/pcre/config.h b/src/3rdparty/pcre/config.h
index ed388fc9ba..d45d88abd1 100644
--- a/src/3rdparty/pcre/config.h
+++ b/src/3rdparty/pcre/config.h
@@ -8,6 +8,7 @@
#define MAX_NAME_COUNT 10000
#define MAX_NAME_SIZE 32
#define NEWLINE 10
+#define PARENS_NEST_LIMIT 250
#define POSIX_MALLOC_THRESHOLD 10
#define SUPPORT_UCP
diff --git a/src/3rdparty/pcre/patches/bug_1423_jit_condition_misoptimization_fix.diff b/src/3rdparty/pcre/patches/bug_1423_jit_condition_misoptimization_fix.diff
new file mode 100644
index 0000000000..4fd46d57a1
--- /dev/null
+++ b/src/3rdparty/pcre/patches/bug_1423_jit_condition_misoptimization_fix.diff
@@ -0,0 +1,15 @@
+Index: pcre_jit_compile.c
+===================================================================
+--- pcre_jit_compile.c (revision 1413)
++++ pcre_jit_compile.c (working copy)
+@@ -3546,7 +3546,9 @@
+ }
+ return TRUE;
+ }
+- if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4]) && is_powerof2(ranges[4] - ranges[2]))
++ if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4])
++ && (ranges[2] | (ranges[4] - ranges[2])) == ranges[4]
++ && is_powerof2(ranges[4] - ranges[2]))
+ {
+ if (readch)
+ read_char(common);
diff --git a/src/3rdparty/pcre/patches/r1340_fix_jit_on_android.patch b/src/3rdparty/pcre/patches/r1340_fix_jit_on_android.patch
deleted file mode 100644
index 41699d4882..0000000000
--- a/src/3rdparty/pcre/patches/r1340_fix_jit_on_android.patch
+++ /dev/null
@@ -1,18 +0,0 @@
-Index: sljit/sljitConfigInternal.h
-===================================================================
---- sljit/sljitConfigInternal.h (revision 1339)
-+++ sljit/sljitConfigInternal.h (working copy)
-@@ -221,6 +221,13 @@
- #define SLJIT_CACHE_FLUSH(from, to) \
- sys_icache_invalidate((char*)(from), (char*)(to) - (char*)(from))
-
-+#elif defined __ANDROID__
-+
-+/* Android lacks __clear_cache; instead, cacheflush should be used. */
-+
-+#define SLJIT_CACHE_FLUSH(from, to) \
-+ cacheflush((long)(from), (long)(to), 0)
-+
- #elif (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
-
- /* The __clear_cache() implementation of GCC is a dummy function on PowerPC. */
diff --git a/src/3rdparty/pcre/pcre.h b/src/3rdparty/pcre/pcre.h
index a6aa4e934b..c85f36b6bc 100644
--- a/src/3rdparty/pcre/pcre.h
+++ b/src/3rdparty/pcre/pcre.h
@@ -5,7 +5,7 @@
/* This is the public header file for the PCRE library, to be #included by
applications that call the PCRE functions.
- Copyright (c) 1997-2012 University of Cambridge
+ Copyright (c) 1997-2013 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE.
/* The current PCRE version information. */
#define PCRE_MAJOR 8
-#define PCRE_MINOR 32
+#define PCRE_MINOR 34
#define PCRE_PRERELEASE
-#define PCRE_DATE 2012-11-30
+#define PCRE_DATE 2013-12-15
/* When an application links to a PCRE DLL in Windows, the symbols that are
imported have to be identified as such. When building PCRE, the appropriate
@@ -96,11 +96,14 @@ extern "C" {
#endif
/* Public options. Some are compile-time only, some are run-time only, and some
-are both, so we keep them all distinct. However, almost all the bits in the
-options word are now used. In the long run, we may have to re-use some of the
-compile-time only bits for runtime options, or vice versa. Any of the
-compile-time options may be inspected during studying (and therefore JIT
-compiling).
+are both. Most of the compile-time options are saved with the compiled regex so
+that they can be inspected during studying (and therefore JIT compiling). Note
+that pcre_study() has its own set of options. Originally, all the options
+defined here used distinct bits. However, almost all the bits in a 32-bit word
+are now used, so in order to conserve them, option bits that were previously
+only recognized at matching time (i.e. by pcre_exec() or pcre_dfa_exec()) may
+also be used for compile-time options that affect only compiling and are not
+relevant for studying or JIT compiling.
Some options for pcre_compile() change its behaviour but do not affect the
behaviour of the execution functions. Other options are passed through to the
@@ -142,8 +145,15 @@ with J. */
#define PCRE_AUTO_CALLOUT 0x00004000 /* C1 */
#define PCRE_PARTIAL_SOFT 0x00008000 /* E D J ) Synonyms */
#define PCRE_PARTIAL 0x00008000 /* E D J ) */
-#define PCRE_DFA_SHORTEST 0x00010000 /* D */
-#define PCRE_DFA_RESTART 0x00020000 /* D */
+
+/* This pair use the same bit. */
+#define PCRE_NEVER_UTF 0x00010000 /* C1 ) Overlaid */
+#define PCRE_DFA_SHORTEST 0x00010000 /* D ) Overlaid */
+
+/* This pair use the same bit. */
+#define PCRE_NO_AUTO_POSSESS 0x00020000 /* C1 ) Overlaid */
+#define PCRE_DFA_RESTART 0x00020000 /* D ) Overlaid */
+
#define PCRE_FIRSTLINE 0x00040000 /* C3 */
#define PCRE_DUPNAMES 0x00080000 /* C1 */
#define PCRE_NEWLINE_CR 0x00100000 /* C3 E D */
@@ -199,6 +209,7 @@ with J. */
#define PCRE_ERROR_DFA_BADRESTART (-30)
#define PCRE_ERROR_JIT_BADOPTION (-31)
#define PCRE_ERROR_BADLENGTH (-32)
+#define PCRE_ERROR_UNSET (-33)
/* Specific error codes for UTF-8 validity checks */
@@ -224,7 +235,7 @@ with J. */
#define PCRE_UTF8_ERR19 19
#define PCRE_UTF8_ERR20 20
#define PCRE_UTF8_ERR21 21
-#define PCRE_UTF8_ERR22 22
+#define PCRE_UTF8_ERR22 22 /* Unused (was non-character) */
/* Specific error codes for UTF-16 validity checks */
@@ -232,13 +243,13 @@ with J. */
#define PCRE_UTF16_ERR1 1
#define PCRE_UTF16_ERR2 2
#define PCRE_UTF16_ERR3 3
-#define PCRE_UTF16_ERR4 4
+#define PCRE_UTF16_ERR4 4 /* Unused (was non-character) */
/* Specific error codes for UTF-32 validity checks */
#define PCRE_UTF32_ERR0 0
#define PCRE_UTF32_ERR1 1
-#define PCRE_UTF32_ERR2 2
+#define PCRE_UTF32_ERR2 2 /* Unused (was non-character) */
#define PCRE_UTF32_ERR3 3
/* Request types for pcre_fullinfo() */
@@ -263,10 +274,13 @@ with J. */
#define PCRE_INFO_JIT 16
#define PCRE_INFO_JITSIZE 17
#define PCRE_INFO_MAXLOOKBEHIND 18
-#define PCRE_INFO_FIRSTCHARACTER 19
-#define PCRE_INFO_FIRSTCHARACTERFLAGS 20
+#define PCRE_INFO_FIRSTCHARACTER 19
+#define PCRE_INFO_FIRSTCHARACTERFLAGS 20
#define PCRE_INFO_REQUIREDCHAR 21
-#define PCRE_INFO_REQUIREDCHARFLAGS 22
+#define PCRE_INFO_REQUIREDCHARFLAGS 22
+#define PCRE_INFO_MATCHLIMIT 23
+#define PCRE_INFO_RECURSIONLIMIT 24
+#define PCRE_INFO_MATCH_EMPTY 25
/* Request types for pcre_config(). Do not re-arrange, in order to remain
compatible. */
@@ -284,6 +298,7 @@ compatible. */
#define PCRE_CONFIG_UTF16 10
#define PCRE_CONFIG_JITTARGET 11
#define PCRE_CONFIG_UTF32 12
+#define PCRE_CONFIG_PARENS_LIMIT 13
/* Request types for pcre_study(). Do not re-arrange, in order to remain
compatible. */
@@ -645,6 +660,9 @@ PCRE_EXP_DECL void pcre16_assign_jit_stack(pcre16_extra *,
pcre16_jit_callback, void *);
PCRE_EXP_DECL void pcre32_assign_jit_stack(pcre32_extra *,
pcre32_jit_callback, void *);
+PCRE_EXP_DECL void pcre_jit_free_unused_memory(void);
+PCRE_EXP_DECL void pcre16_jit_free_unused_memory(void);
+PCRE_EXP_DECL void pcre32_jit_free_unused_memory(void);
#ifdef __cplusplus
} /* extern "C" */
diff --git a/src/3rdparty/pcre/pcre16_valid_utf16.c b/src/3rdparty/pcre/pcre16_valid_utf16.c
index 1486dfac09..1987f2710c 100644
--- a/src/3rdparty/pcre/pcre16_valid_utf16.c
+++ b/src/3rdparty/pcre/pcre16_valid_utf16.c
@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
- Copyright (c) 1997-2012 University of Cambridge
+ Copyright (c) 1997-2013 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -69,7 +69,7 @@ PCRE_UTF16_ERR0 No error
PCRE_UTF16_ERR1 Missing low surrogate at the end of the string
PCRE_UTF16_ERR2 Invalid low surrogate
PCRE_UTF16_ERR3 Isolated low surrogate
-PCRE_UTF16_ERR4 Non-character
+PCRE_UTF16_ERR4 Unused (was non-character)
Arguments:
string points to the string
@@ -100,19 +100,10 @@ for (p = string; length-- > 0; p++)
if ((c & 0xf800) != 0xd800)
{
/* Normal UTF-16 code point. Neither high nor low surrogate. */
-
- /* Check for non-characters */
- if ((c & 0xfffeu) == 0xfffeu || (c >= 0xfdd0u && c <= 0xfdefu))
- {
- *erroroffset = p - string;
- return PCRE_UTF16_ERR4;
- }
}
else if ((c & 0x0400) == 0)
{
- /* High surrogate. */
-
- /* Must be a followed by a low surrogate. */
+ /* High surrogate. Must be a followed by a low surrogate. */
if (length == 0)
{
*erroroffset = p - string;
@@ -125,16 +116,6 @@ for (p = string; length-- > 0; p++)
*erroroffset = p - string;
return PCRE_UTF16_ERR2;
}
- else
- {
- /* Valid surrogate, but check for non-characters */
- c = (((c & 0x3ffu) << 10) | (*p & 0x3ffu)) + 0x10000u;
- if ((c & 0xfffeu) == 0xfffeu)
- {
- *erroroffset = p - string;
- return PCRE_UTF16_ERR4;
- }
- }
}
else
{
diff --git a/src/3rdparty/pcre/pcre_byte_order.c b/src/3rdparty/pcre/pcre_byte_order.c
index 9f8eec87a5..02b8050327 100644
--- a/src/3rdparty/pcre/pcre_byte_order.c
+++ b/src/3rdparty/pcre/pcre_byte_order.c
@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
- Copyright (c) 1997-2012 University of Cambridge
+ Copyright (c) 1997-2013 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -126,14 +126,15 @@ if (re->magic_number == MAGIC_NUMBER)
}
if (re->magic_number != REVERSED_MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
-if ((swap_uint16(re->flags) & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
+if ((swap_uint32(re->flags) & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
re->magic_number = MAGIC_NUMBER;
re->size = swap_uint32(re->size);
re->options = swap_uint32(re->options);
-re->flags = swap_uint16(re->flags);
-re->top_bracket = swap_uint16(re->top_bracket);
-re->top_backref = swap_uint16(re->top_backref);
+re->flags = swap_uint32(re->flags);
+re->limit_match = swap_uint32(re->limit_match);
+re->limit_recursion = swap_uint32(re->limit_recursion);
+
#if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
re->first_char = swap_uint16(re->first_char);
re->req_char = swap_uint16(re->req_char);
@@ -141,15 +142,15 @@ re->req_char = swap_uint16(re->req_char);
re->first_char = swap_uint32(re->first_char);
re->req_char = swap_uint32(re->req_char);
#endif
+
+re->max_lookbehind = swap_uint16(re->max_lookbehind);
+re->top_bracket = swap_uint16(re->top_bracket);
+re->top_backref = swap_uint16(re->top_backref);
re->name_table_offset = swap_uint16(re->name_table_offset);
re->name_entry_size = swap_uint16(re->name_entry_size);
re->name_count = swap_uint16(re->name_count);
re->ref_count = swap_uint16(re->ref_count);
re->tables = tables;
-#ifdef COMPILE_PCRE32
-re->dummy1 = swap_uint16(re->dummy1);
-re->dummy2 = swap_uint16(re->dummy2);
-#endif
if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_STUDY_DATA) != 0)
{
diff --git a/src/3rdparty/pcre/pcre_chartables.c b/src/3rdparty/pcre/pcre_chartables.c
index 55df49777d..89cc255a84 100644
--- a/src/3rdparty/pcre/pcre_chartables.c
+++ b/src/3rdparty/pcre/pcre_chartables.c
@@ -163,7 +163,7 @@ graph, print, punct, and cntrl. Other classes are built from combinations. */
*/
0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
- 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
+ 0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /* 8- 15 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
diff --git a/src/3rdparty/pcre/pcre_compile.c b/src/3rdparty/pcre/pcre_compile.c
index b3b64fb7a0..9708b93923 100644
--- a/src/3rdparty/pcre/pcre_compile.c
+++ b/src/3rdparty/pcre/pcre_compile.c
@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
- Copyright (c) 1997-2012 University of Cambridge
+ Copyright (c) 1997-2013 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -115,6 +115,13 @@ kicks in at the same number of forward references in all cases. */
#define COMPILE_WORK_SIZE (2048*LINK_SIZE)
#define COMPILE_WORK_SIZE_MAX (100*COMPILE_WORK_SIZE)
+/* This value determines the size of the initial vector that is used for
+remembering named groups during the pre-compile. It is allocated on the stack,
+but if it is too small, it is expanded using malloc(), in a similar way to the
+workspace. The value is the number of slots in the list. */
+
+#define NAMED_GROUP_LIST_SIZE 20
+
/* The overrun tests check for a slightly smaller size so that they detect the
overrun before it actually does run off the end of the data block. */
@@ -253,11 +260,25 @@ static const verbitem verbs[] = {
static const int verbcount = sizeof(verbs)/sizeof(verbitem);
+/* Substitutes for [[:<:]] and [[:>:]], which mean start and end of word in
+another regex library. */
+
+static const pcre_uchar sub_start_of_word[] = {
+ CHAR_BACKSLASH, CHAR_b, CHAR_LEFT_PARENTHESIS, CHAR_QUESTION_MARK,
+ CHAR_EQUALS_SIGN, CHAR_BACKSLASH, CHAR_w, CHAR_RIGHT_PARENTHESIS, '\0' };
+
+static const pcre_uchar sub_end_of_word[] = {
+ CHAR_BACKSLASH, CHAR_b, CHAR_LEFT_PARENTHESIS, CHAR_QUESTION_MARK,
+ CHAR_LESS_THAN_SIGN, CHAR_EQUALS_SIGN, CHAR_BACKSLASH, CHAR_w,
+ CHAR_RIGHT_PARENTHESIS, '\0' };
+
+
/* Tables of names of POSIX character classes and their lengths. The names are
now all in a single string, to reduce the number of relocations when a shared
library is dynamically loaded. The list of lengths is terminated by a zero
length entry. The first three must be alpha, lower, upper, as this is assumed
-for handling case independence. */
+for handling case independence. The indices for graph, print, and punct are
+needed, so identify them. */
static const char posix_names[] =
STRING_alpha0 STRING_lower0 STRING_upper0 STRING_alnum0
@@ -268,6 +289,11 @@ static const char posix_names[] =
static const pcre_uint8 posix_name_lengths[] = {
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 };
+#define PC_GRAPH 8
+#define PC_PRINT 9
+#define PC_PUNCT 10
+
+
/* Table of class bit maps for each POSIX class. Each class is formed from a
base map, with an optional addition or removal of another map. Then, for some
classes, there is some additional tweaking: for [:blank:] the vertical space
@@ -295,9 +321,8 @@ static const int posix_class_maps[] = {
cbit_xdigit,-1, 0 /* xdigit */
};
-/* Table of substitutes for \d etc when PCRE_UCP is set. The POSIX class
-substitutes must be in the order of the names, defined above, and there are
-both positive and negative cases. NULL means no substitute. */
+/* Table of substitutes for \d etc when PCRE_UCP is set. They are replaced by
+Unicode property escapes. */
#ifdef SUPPORT_UCP
static const pcre_uchar string_PNd[] = {
@@ -322,12 +347,18 @@ static const pcre_uchar string_pXwd[] = {
static const pcre_uchar *substitutes[] = {
string_PNd, /* \D */
string_pNd, /* \d */
- string_PXsp, /* \S */ /* NOTE: Xsp is Perl space */
- string_pXsp, /* \s */
+ string_PXsp, /* \S */ /* Xsp is Perl space, but from 8.34, Perl */
+ string_pXsp, /* \s */ /* space and POSIX space are the same. */
string_PXwd, /* \W */
string_pXwd /* \w */
};
+/* The POSIX class substitutes must be in the order of the POSIX class names,
+defined above, and there are both positive and negative cases. NULL means no
+general substitute of a Unicode property escape (\p or \P). However, for some
+POSIX classes (e.g. graph, print, punct) a special property code is compiled
+directly. */
+
static const pcre_uchar string_pL[] = {
CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
CHAR_L, CHAR_RIGHT_CURLY_BRACKET, '\0' };
@@ -375,8 +406,8 @@ static const pcre_uchar *posix_substitutes[] = {
NULL, /* graph */
NULL, /* print */
NULL, /* punct */
- string_pXps, /* space */ /* NOTE: Xps is POSIX space */
- string_pXwd, /* word */
+ string_pXps, /* space */ /* Xps is POSIX space, but from 8.34 */
+ string_pXwd, /* word */ /* Perl and POSIX space are the same */
NULL, /* xdigit */
/* Negated cases */
string_PL, /* ^alpha */
@@ -390,8 +421,8 @@ static const pcre_uchar *posix_substitutes[] = {
NULL, /* ^graph */
NULL, /* ^print */
NULL, /* ^punct */
- string_PXps, /* ^space */ /* NOTE: Xps is POSIX space */
- string_PXwd, /* ^word */
+ string_PXps, /* ^space */ /* Xps is POSIX space, but from 8.34 */
+ string_PXwd, /* ^word */ /* Perl and POSIX space are the same */
NULL /* ^xdigit */
};
#define POSIX_SUBSIZE (sizeof(posix_substitutes) / sizeof(pcre_uchar *))
@@ -455,7 +486,7 @@ static const char error_texts[] =
"POSIX collating elements are not supported\0"
"this version of PCRE is compiled without UTF support\0"
"spare error\0" /** DEAD **/
- "character value in \\x{...} sequence is too large\0"
+ "character value in \\x{} or \\o{} is too large\0"
/* 35 */
"invalid condition (?(0)\0"
"\\C not allowed in lookbehind assertion\0"
@@ -487,7 +518,7 @@ static const char error_texts[] =
"a numbered reference must not be zero\0"
"an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)\0"
/* 60 */
- "(*VERB) not recognized\0"
+ "(*VERB) not recognized or malformed\0"
"number is too big\0"
"subpattern name expected\0"
"digit expected after (?+\0"
@@ -508,6 +539,14 @@ static const char error_texts[] =
"name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0"
"character value in \\u.... sequence is too large\0"
"invalid UTF-32 string\0"
+ "setting UTF is disabled by the application\0"
+ "non-hex character in \\x{} (closing brace missing?)\0"
+ /* 80 */
+ "non-octal character in \\o{} (closing brace missing?)\0"
+ "missing opening brace after \\o\0"
+ "parentheses are too deeply nested\0"
+ "invalid range in character class\0"
+ "group name must start with a non-digit\0"
;
/* Table to identify digits and hex digits. This is used when compiling
@@ -647,6 +686,183 @@ static const pcre_uint8 ebcdic_chartab[] = { /* chartable partial dup */
#endif
+/* This table is used to check whether auto-possessification is possible
+between adjacent character-type opcodes. The left-hand (repeated) opcode is
+used to select the row, and the right-hand opcode is use to select the column.
+A value of 1 means that auto-possessification is OK. For example, the second
+value in the first row means that \D+\d can be turned into \D++\d.
+
+The Unicode property types (\P and \p) have to be present to fill out the table
+because of what their opcode values are, but the table values should always be
+zero because property types are handled separately in the code. The last four
+columns apply to items that cannot be repeated, so there is no need to have
+rows for them. Note that OP_DIGIT etc. are generated only when PCRE_UCP is
+*not* set. When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */
+
+#define APTROWS (LAST_AUTOTAB_LEFT_OP - FIRST_AUTOTAB_OP + 1)
+#define APTCOLS (LAST_AUTOTAB_RIGHT_OP - FIRST_AUTOTAB_OP + 1)
+
+static const pcre_uint8 autoposstab[APTROWS][APTCOLS] = {
+/* \D \d \S \s \W \w . .+ \C \P \p \R \H \h \V \v \X \Z \z $ $M */
+ { 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* \D */
+ { 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 }, /* \d */
+ { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 }, /* \S */
+ { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* \s */
+ { 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* \W */
+ { 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 }, /* \w */
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* . */
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* .+ */
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* \C */
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* \P */
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* \p */
+ { 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 }, /* \R */
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 }, /* \H */
+ { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0 }, /* \h */
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0 }, /* \V */
+ { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0 }, /* \v */
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 } /* \X */
+};
+
+
+/* This table is used to check whether auto-possessification is possible
+between adjacent Unicode property opcodes (OP_PROP and OP_NOTPROP). The
+left-hand (repeated) opcode is used to select the row, and the right-hand
+opcode is used to select the column. The values are as follows:
+
+ 0 Always return FALSE (never auto-possessify)
+ 1 Character groups are distinct (possessify if both are OP_PROP)
+ 2 Check character categories in the same group (general or particular)
+ 3 TRUE if the two opcodes are not the same (PROP vs NOTPROP)
+
+ 4 Check left general category vs right particular category
+ 5 Check right general category vs left particular category
+
+ 6 Left alphanum vs right general category
+ 7 Left space vs right general category
+ 8 Left word vs right general category
+
+ 9 Right alphanum vs left general category
+ 10 Right space vs left general category
+ 11 Right word vs left general category
+
+ 12 Left alphanum vs right particular category
+ 13 Left space vs right particular category
+ 14 Left word vs right particular category
+
+ 15 Right alphanum vs left particular category
+ 16 Right space vs left particular category
+ 17 Right word vs left particular category
+*/
+
+static const pcre_uint8 propposstab[PT_TABSIZE][PT_TABSIZE] = {
+/* ANY LAMP GC PC SC ALNUM SPACE PXSPACE WORD CLIST UCNC */
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_ANY */
+ { 0, 3, 0, 0, 0, 3, 1, 1, 0, 0, 0 }, /* PT_LAMP */
+ { 0, 0, 2, 4, 0, 9, 10, 10, 11, 0, 0 }, /* PT_GC */
+ { 0, 0, 5, 2, 0, 15, 16, 16, 17, 0, 0 }, /* PT_PC */
+ { 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0 }, /* PT_SC */
+ { 0, 3, 6, 12, 0, 3, 1, 1, 0, 0, 0 }, /* PT_ALNUM */
+ { 0, 1, 7, 13, 0, 1, 3, 3, 1, 0, 0 }, /* PT_SPACE */
+ { 0, 1, 7, 13, 0, 1, 3, 3, 1, 0, 0 }, /* PT_PXSPACE */
+ { 0, 0, 8, 14, 0, 0, 1, 1, 3, 0, 0 }, /* PT_WORD */
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_CLIST */
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3 } /* PT_UCNC */
+};
+
+/* This table is used to check whether auto-possessification is possible
+between adjacent Unicode property opcodes (OP_PROP and OP_NOTPROP) when one
+specifies a general category and the other specifies a particular category. The
+row is selected by the general category and the column by the particular
+category. The value is 1 if the particular category is not part of the general
+category. */
+
+static const pcre_uint8 catposstab[7][30] = {
+/* Cc Cf Cn Co Cs Ll Lm Lo Lt Lu Mc Me Mn Nd Nl No Pc Pd Pe Pf Pi Po Ps Sc Sk Sm So Zl Zp Zs */
+ { 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* C */
+ { 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* L */
+ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* M */
+ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* N */
+ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1 }, /* P */
+ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1 }, /* S */
+ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0 } /* Z */
+};
+
+/* This table is used when checking ALNUM, (PX)SPACE, SPACE, and WORD against
+a general or particular category. The properties in each row are those
+that apply to the character set in question. Duplication means that a little
+unnecessary work is done when checking, but this keeps things much simpler
+because they can all use the same code. For more details see the comment where
+this table is used.
+
+Note: SPACE and PXSPACE used to be different because Perl excluded VT from
+"space", but from Perl 5.18 it's included, so both categories are treated the
+same here. */
+
+static const pcre_uint8 posspropstab[3][4] = {
+ { ucp_L, ucp_N, ucp_N, ucp_Nl }, /* ALNUM, 3rd and 4th values redundant */
+ { ucp_Z, ucp_Z, ucp_C, ucp_Cc }, /* SPACE and PXSPACE, 2nd value redundant */
+ { ucp_L, ucp_N, ucp_P, ucp_Po } /* WORD */
+};
+
+/* This table is used when converting repeating opcodes into possessified
+versions as a result of an explicit possessive quantifier such as ++. A zero
+value means there is no possessified version - in those cases the item in
+question must be wrapped in ONCE brackets. The table is truncated at OP_CALLOUT
+because all relevant opcodes are less than that. */
+
+static const pcre_uint8 opcode_possessify[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0 - 15 */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16 - 31 */
+
+ 0, /* NOTI */
+ OP_POSSTAR, 0, /* STAR, MINSTAR */
+ OP_POSPLUS, 0, /* PLUS, MINPLUS */
+ OP_POSQUERY, 0, /* QUERY, MINQUERY */
+ OP_POSUPTO, 0, /* UPTO, MINUPTO */
+ 0, /* EXACT */
+ 0, 0, 0, 0, /* POS{STAR,PLUS,QUERY,UPTO} */
+
+ OP_POSSTARI, 0, /* STARI, MINSTARI */
+ OP_POSPLUSI, 0, /* PLUSI, MINPLUSI */
+ OP_POSQUERYI, 0, /* QUERYI, MINQUERYI */
+ OP_POSUPTOI, 0, /* UPTOI, MINUPTOI */
+ 0, /* EXACTI */
+ 0, 0, 0, 0, /* POS{STARI,PLUSI,QUERYI,UPTOI} */
+
+ OP_NOTPOSSTAR, 0, /* NOTSTAR, NOTMINSTAR */
+ OP_NOTPOSPLUS, 0, /* NOTPLUS, NOTMINPLUS */
+ OP_NOTPOSQUERY, 0, /* NOTQUERY, NOTMINQUERY */
+ OP_NOTPOSUPTO, 0, /* NOTUPTO, NOTMINUPTO */
+ 0, /* NOTEXACT */
+ 0, 0, 0, 0, /* NOTPOS{STAR,PLUS,QUERY,UPTO} */
+
+ OP_NOTPOSSTARI, 0, /* NOTSTARI, NOTMINSTARI */
+ OP_NOTPOSPLUSI, 0, /* NOTPLUSI, NOTMINPLUSI */
+ OP_NOTPOSQUERYI, 0, /* NOTQUERYI, NOTMINQUERYI */
+ OP_NOTPOSUPTOI, 0, /* NOTUPTOI, NOTMINUPTOI */
+ 0, /* NOTEXACTI */
+ 0, 0, 0, 0, /* NOTPOS{STARI,PLUSI,QUERYI,UPTOI} */
+
+ OP_TYPEPOSSTAR, 0, /* TYPESTAR, TYPEMINSTAR */
+ OP_TYPEPOSPLUS, 0, /* TYPEPLUS, TYPEMINPLUS */
+ OP_TYPEPOSQUERY, 0, /* TYPEQUERY, TYPEMINQUERY */
+ OP_TYPEPOSUPTO, 0, /* TYPEUPTO, TYPEMINUPTO */
+ 0, /* TYPEEXACT */
+ 0, 0, 0, 0, /* TYPEPOS{STAR,PLUS,QUERY,UPTO} */
+
+ OP_CRPOSSTAR, 0, /* CRSTAR, CRMINSTAR */
+ OP_CRPOSPLUS, 0, /* CRPLUS, CRMINPLUS */
+ OP_CRPOSQUERY, 0, /* CRQUERY, CRMINQUERY */
+ OP_CRPOSRANGE, 0, /* CRRANGE, CRMINRANGE */
+ 0, 0, 0, 0, /* CRPOS{STAR,PLUS,QUERY,RANGE} */
+
+ 0, 0, 0, /* CLASS, NCLASS, XCLASS */
+ 0, 0, /* REF, REFI */
+ 0, 0, /* DNREF, DNREFI */
+ 0, 0 /* RECURSE, CALLOUT */
+};
+
+
/*************************************************
* Find an error text *
@@ -674,6 +890,7 @@ return s;
}
+
/*************************************************
* Expand the workspace *
*************************************************/
@@ -751,16 +968,15 @@ return (*p == CHAR_RIGHT_CURLY_BRACKET);
*************************************************/
/* This function is called when a \ has been encountered. It either returns a
-positive value for a simple escape such as \n, or 0 for a data character
-which will be placed in chptr. A backreference to group n is returned as
-negative n. When UTF-8 is enabled, a positive value greater than 255 may
-be returned in chptr.
-On entry,ptr is pointing at the \. On exit, it is on the final character of the
-escape sequence.
+positive value for a simple escape such as \n, or 0 for a data character which
+will be placed in chptr. A backreference to group n is returned as negative n.
+When UTF-8 is enabled, a positive value greater than 255 may be returned in
+chptr. On entry, ptr is pointing at the \. On exit, it is on the final
+character of the escape sequence.
Arguments:
ptrptr points to the pattern position pointer
- chptr points to the data character
+ chptr points to a returned data character
errorcodeptr points to the errorcode variable
bracount number of previous extracting brackets
options the options bits
@@ -797,7 +1013,8 @@ Otherwise further processing may be required. */
#ifndef EBCDIC /* ASCII/UTF-8 coding */
/* Not alphanumeric */
else if (c < CHAR_0 || c > CHAR_z) {}
-else if ((i = escapes[c - CHAR_0]) != 0) { if (i > 0) c = (pcre_uint32)i; else escape = -i; }
+else if ((i = escapes[c - CHAR_0]) != 0)
+ { if (i > 0) c = (pcre_uint32)i; else escape = -i; }
#else /* EBCDIC coding */
/* Not alphanumeric */
@@ -847,11 +1064,11 @@ else
}
#if defined COMPILE_PCRE8
- if (c > (utf ? 0x10ffff : 0xff))
+ if (c > (utf ? 0x10ffffU : 0xffU))
#elif defined COMPILE_PCRE16
- if (c > (utf ? 0x10ffff : 0xffff))
+ if (c > (utf ? 0x10ffffU : 0xffffU))
#elif defined COMPILE_PCRE32
- if (utf && c > 0x10ffff)
+ if (utf && c > 0x10ffffU)
#endif
{
*errorcodeptr = ERR76;
@@ -963,16 +1180,20 @@ else
break;
/* The handling of escape sequences consisting of a string of digits
- starting with one that is not zero is not straightforward. By experiment,
- the way Perl works seems to be as follows:
+ starting with one that is not zero is not straightforward. Perl has changed
+ over the years. Nowadays \g{} for backreferences and \o{} for octal are
+ recommended to avoid the ambiguities in the old syntax.
Outside a character class, the digits are read as a decimal number. If the
- number is less than 10, or if there are that many previous extracting
- left brackets, then it is a back reference. Otherwise, up to three octal
- digits are read to form an escaped byte. Thus \123 is likely to be octal
- 123 (cf \0123, which is octal 012 followed by the literal 3). If the octal
- value is greater than 377, the least significant 8 bits are taken. Inside a
- character class, \ followed by a digit is always an octal number. */
+ number is less than 8 (used to be 10), or if there are that many previous
+ extracting left brackets, then it is a back reference. Otherwise, up to
+ three octal digits are read to form an escaped byte. Thus \123 is likely to
+ be octal 123 (cf \0123, which is octal 012 followed by the literal 3). If
+ the octal value is greater than 377, the least significant 8 bits are
+ taken. \8 and \9 are treated as the literal characters 8 and 9.
+
+ Inside a character class, \ followed by a digit is always either a literal
+ 8 or 9 or an octal number. */
case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4: case CHAR_5:
case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9:
@@ -999,7 +1220,7 @@ else
*errorcodeptr = ERR61;
break;
}
- if (s < 10 || s <= bracount)
+ if (s < 8 || s <= bracount) /* Check for back reference */
{
escape = -s;
break;
@@ -1007,16 +1228,14 @@ else
ptr = oldptr; /* Put the pointer back and fall through */
}
- /* Handle an octal number following \. If the first digit is 8 or 9, Perl
- generates a binary zero byte and treats the digit as a following literal.
- Thus we have to pull back the pointer by one. */
+ /* Handle a digit following \ when the number is not a back reference. If
+ the first digit is 8 or 9, Perl used to generate a binary zero byte and
+ then treat the digit as a following literal. At least by Perl 5.18 this
+ changed so as not to insert the binary zero. */
- if ((c = *ptr) >= CHAR_8)
- {
- ptr--;
- c = 0;
- break;
- }
+ if ((c = *ptr) >= CHAR_8) break;
+
+ /* Fall through with a digit less than 8 */
/* \0 always starts an octal number, but we may drop through to here with a
larger first octal digit. The original code used just to take the least
@@ -1033,15 +1252,50 @@ else
#endif
break;
- /* \x is complicated. \x{ddd} is a character number which can be greater
- than 0xff in utf or non-8bit mode, but only if the ddd are hex digits.
- If not, { is treated as a data character. */
+ /* \o is a relatively new Perl feature, supporting a more general way of
+ specifying character codes in octal. The only supported form is \o{ddd}. */
+
+ case CHAR_o:
+ if (ptr[1] != CHAR_LEFT_CURLY_BRACKET) *errorcodeptr = ERR81; else
+ {
+ ptr += 2;
+ c = 0;
+ overflow = FALSE;
+ while (*ptr >= CHAR_0 && *ptr <= CHAR_7)
+ {
+ register pcre_uint32 cc = *ptr++;
+ if (c == 0 && cc == CHAR_0) continue; /* Leading zeroes */
+#ifdef COMPILE_PCRE32
+ if (c >= 0x20000000l) { overflow = TRUE; break; }
+#endif
+ c = (c << 3) + cc - CHAR_0 ;
+#if defined COMPILE_PCRE8
+ if (c > (utf ? 0x10ffffU : 0xffU)) { overflow = TRUE; break; }
+#elif defined COMPILE_PCRE16
+ if (c > (utf ? 0x10ffffU : 0xffffU)) { overflow = TRUE; break; }
+#elif defined COMPILE_PCRE32
+ if (utf && c > 0x10ffffU) { overflow = TRUE; break; }
+#endif
+ }
+ if (overflow)
+ {
+ while (*ptr >= CHAR_0 && *ptr <= CHAR_7) ptr++;
+ *errorcodeptr = ERR34;
+ }
+ else if (*ptr == CHAR_RIGHT_CURLY_BRACKET)
+ {
+ if (utf && c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73;
+ }
+ else *errorcodeptr = ERR80;
+ }
+ break;
+
+ /* \x is complicated. In JavaScript, \x must be followed by two hexadecimal
+ numbers. Otherwise it is a lowercase x letter. */
case CHAR_x:
if ((options & PCRE_JAVASCRIPT_COMPAT) != 0)
{
- /* In JavaScript, \x must be followed by two hexadecimal numbers.
- Otherwise it is a lowercase x letter. */
if (MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0
&& MAX_255(ptr[2]) && (digitab[ptr[2]] & ctype_xdigit) != 0)
{
@@ -1058,73 +1312,86 @@ else
#endif
}
}
- break;
- }
+ } /* End JavaScript handling */
- if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)
- {
- const pcre_uchar *pt = ptr + 2;
+ /* Handle \x in Perl's style. \x{ddd} is a character number which can be
+ greater than 0xff in utf or non-8bit mode, but only if the ddd are hex
+ digits. If not, { used to be treated as a data character. However, Perl
+ seems to read hex digits up to the first non-such, and ignore the rest, so
+ that, for example \x{zz} matches a binary zero. This seems crazy, so PCRE
+ now gives an error. */
- c = 0;
- overflow = FALSE;
- while (MAX_255(*pt) && (digitab[*pt] & ctype_xdigit) != 0)
+ else
+ {
+ if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)
{
- register pcre_uint32 cc = *pt++;
- if (c == 0 && cc == CHAR_0) continue; /* Leading zeroes */
+ ptr += 2;
+ c = 0;
+ overflow = FALSE;
+ while (MAX_255(*ptr) && (digitab[*ptr] & ctype_xdigit) != 0)
+ {
+ register pcre_uint32 cc = *ptr++;
+ if (c == 0 && cc == CHAR_0) continue; /* Leading zeroes */
#ifdef COMPILE_PCRE32
- if (c >= 0x10000000l) { overflow = TRUE; break; }
+ if (c >= 0x10000000l) { overflow = TRUE; break; }
#endif
#ifndef EBCDIC /* ASCII/UTF-8 coding */
- if (cc >= CHAR_a) cc -= 32; /* Convert to upper case */
- c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
+ if (cc >= CHAR_a) cc -= 32; /* Convert to upper case */
+ c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
#else /* EBCDIC coding */
- if (cc >= CHAR_a && cc <= CHAR_z) cc += 64; /* Convert to upper case */
- c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
+ if (cc >= CHAR_a && cc <= CHAR_z) cc += 64; /* Convert to upper case */
+ c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
#endif
#if defined COMPILE_PCRE8
- if (c > (utf ? 0x10ffff : 0xff)) { overflow = TRUE; break; }
+ if (c > (utf ? 0x10ffffU : 0xffU)) { overflow = TRUE; break; }
#elif defined COMPILE_PCRE16
- if (c > (utf ? 0x10ffff : 0xffff)) { overflow = TRUE; break; }
+ if (c > (utf ? 0x10ffffU : 0xffffU)) { overflow = TRUE; break; }
#elif defined COMPILE_PCRE32
- if (utf && c > 0x10ffff) { overflow = TRUE; break; }
+ if (utf && c > 0x10ffffU) { overflow = TRUE; break; }
#endif
- }
+ }
- if (overflow)
- {
- while (MAX_255(*pt) && (digitab[*pt] & ctype_xdigit) != 0) pt++;
- *errorcodeptr = ERR34;
- }
+ if (overflow)
+ {
+ while (MAX_255(*ptr) && (digitab[*ptr] & ctype_xdigit) != 0) ptr++;
+ *errorcodeptr = ERR34;
+ }
- if (*pt == CHAR_RIGHT_CURLY_BRACKET)
- {
- if (utf && c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73;
- ptr = pt;
- break;
- }
+ else if (*ptr == CHAR_RIGHT_CURLY_BRACKET)
+ {
+ if (utf && c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73;
+ }
- /* If the sequence of hex digits does not end with '}', then we don't
- recognize this construct; fall through to the normal \x handling. */
- }
+ /* If the sequence of hex digits does not end with '}', give an error.
+ We used just to recognize this construct and fall through to the normal
+ \x handling, but nowadays Perl gives an error, which seems much more
+ sensible, so we do too. */
- /* Read just a single-byte hex-defined char */
+ else *errorcodeptr = ERR79;
+ } /* End of \x{} processing */
- c = 0;
- while (i++ < 2 && MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0)
- {
- pcre_uint32 cc; /* Some compilers don't like */
- cc = *(++ptr); /* ++ in initializers */
+ /* Read a single-byte hex-defined char (up to two hex digits after \x) */
+
+ else
+ {
+ c = 0;
+ while (i++ < 2 && MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0)
+ {
+ pcre_uint32 cc; /* Some compilers don't like */
+ cc = *(++ptr); /* ++ in initializers */
#ifndef EBCDIC /* ASCII/UTF-8 coding */
- if (cc >= CHAR_a) cc -= 32; /* Convert to upper case */
- c = c * 16 + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
+ if (cc >= CHAR_a) cc -= 32; /* Convert to upper case */
+ c = c * 16 + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
#else /* EBCDIC coding */
- if (cc <= CHAR_z) cc += 64; /* Convert to upper case */
- c = c * 16 + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
+ if (cc <= CHAR_z) cc += 64; /* Convert to upper case */
+ c = c * 16 + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
#endif
- }
+ }
+ } /* End of \xdd handling */
+ } /* End of Perl-style \x handling */
break;
/* For \c, a following letter is upper-cased; then the 0x40 bit is flipped.
@@ -1190,6 +1457,8 @@ if ((options & PCRE_UCP) != 0 && escape >= ESC_D && escape <= ESC_w)
return escape;
}
+
+
#ifdef SUPPORT_UCP
/*************************************************
* Handle \P and \p *
@@ -1287,7 +1556,6 @@ return FALSE;
-
/*************************************************
* Read repeat counts *
*************************************************/
@@ -1356,302 +1624,6 @@ return p;
/*************************************************
-* Subroutine for finding forward reference *
-*************************************************/
-
-/* This recursive function is called only from find_parens() below. The
-top-level call starts at the beginning of the pattern. All other calls must
-start at a parenthesis. It scans along a pattern's text looking for capturing
-subpatterns, and counting them. If it finds a named pattern that matches the
-name it is given, it returns its number. Alternatively, if the name is NULL, it
-returns when it reaches a given numbered subpattern. Recursion is used to keep
-track of subpatterns that reset the capturing group numbers - the (?| feature.
-
-This function was originally called only from the second pass, in which we know
-that if (?< or (?' or (?P< is encountered, the name will be correctly
-terminated because that is checked in the first pass. There is now one call to
-this function in the first pass, to check for a recursive back reference by
-name (so that we can make the whole group atomic). In this case, we need check
-only up to the current position in the pattern, and that is still OK because
-and previous occurrences will have been checked. To make this work, the test
-for "end of pattern" is a check against cd->end_pattern in the main loop,
-instead of looking for a binary zero. This means that the special first-pass
-call can adjust cd->end_pattern temporarily. (Checks for binary zero while
-processing items within the loop are OK, because afterwards the main loop will
-terminate.)
-
-Arguments:
- ptrptr address of the current character pointer (updated)
- cd compile background data
- name name to seek, or NULL if seeking a numbered subpattern
- lorn name length, or subpattern number if name is NULL
- xmode TRUE if we are in /x mode
- utf TRUE if we are in UTF-8 / UTF-16 / UTF-32 mode
- count pointer to the current capturing subpattern number (updated)
-
-Returns: the number of the named subpattern, or -1 if not found
-*/
-
-static int
-find_parens_sub(pcre_uchar **ptrptr, compile_data *cd, const pcre_uchar *name, int lorn,
- BOOL xmode, BOOL utf, int *count)
-{
-pcre_uchar *ptr = *ptrptr;
-int start_count = *count;
-int hwm_count = start_count;
-BOOL dup_parens = FALSE;
-
-/* If the first character is a parenthesis, check on the type of group we are
-dealing with. The very first call may not start with a parenthesis. */
-
-if (ptr[0] == CHAR_LEFT_PARENTHESIS)
- {
- /* Handle specials such as (*SKIP) or (*UTF8) etc. */
-
- if (ptr[1] == CHAR_ASTERISK) ptr += 2;
-
- /* Handle a normal, unnamed capturing parenthesis. */
-
- else if (ptr[1] != CHAR_QUESTION_MARK)
- {
- *count += 1;
- if (name == NULL && *count == lorn) return *count;
- ptr++;
- }
-
- /* All cases now have (? at the start. Remember when we are in a group
- where the parenthesis numbers are duplicated. */
-
- else if (ptr[2] == CHAR_VERTICAL_LINE)
- {
- ptr += 3;
- dup_parens = TRUE;
- }
-
- /* Handle comments; all characters are allowed until a ket is reached. */
-
- else if (ptr[2] == CHAR_NUMBER_SIGN)
- {
- for (ptr += 3; *ptr != CHAR_NULL; ptr++)
- if (*ptr == CHAR_RIGHT_PARENTHESIS) break;
- goto FAIL_EXIT;
- }
-
- /* Handle a condition. If it is an assertion, just carry on so that it
- is processed as normal. If not, skip to the closing parenthesis of the
- condition (there can't be any nested parens). */
-
- else if (ptr[2] == CHAR_LEFT_PARENTHESIS)
- {
- ptr += 2;
- if (ptr[1] != CHAR_QUESTION_MARK)
- {
- while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
- if (*ptr != CHAR_NULL) ptr++;
- }
- }
-
- /* Start with (? but not a condition. */
-
- else
- {
- ptr += 2;
- if (*ptr == CHAR_P) ptr++; /* Allow optional P */
-
- /* We have to disambiguate (?<! and (?<= from (?<name> for named groups */
-
- if ((*ptr == CHAR_LESS_THAN_SIGN && ptr[1] != CHAR_EXCLAMATION_MARK &&
- ptr[1] != CHAR_EQUALS_SIGN) || *ptr == CHAR_APOSTROPHE)
- {
- pcre_uchar term;
- const pcre_uchar *thisname;
- *count += 1;
- if (name == NULL && *count == lorn) return *count;
- term = *ptr++;
- if (term == CHAR_LESS_THAN_SIGN) term = CHAR_GREATER_THAN_SIGN;
- thisname = ptr;
- while (*ptr != term) ptr++;
- if (name != NULL && lorn == (int)(ptr - thisname) &&
- STRNCMP_UC_UC(name, thisname, (unsigned int)lorn) == 0)
- return *count;
- term++;
- }
- }
- }
-
-/* Past any initial parenthesis handling, scan for parentheses or vertical
-bars. Stop if we get to cd->end_pattern. Note that this is important for the
-first-pass call when this value is temporarily adjusted to stop at the current
-position. So DO NOT change this to a test for binary zero. */
-
-for (; ptr < cd->end_pattern; ptr++)
- {
- /* Skip over backslashed characters and also entire \Q...\E */
-
- if (*ptr == CHAR_BACKSLASH)
- {
- if (*(++ptr) == CHAR_NULL) goto FAIL_EXIT;
- if (*ptr == CHAR_Q) for (;;)
- {
- while (*(++ptr) != CHAR_NULL && *ptr != CHAR_BACKSLASH) {};
- if (*ptr == CHAR_NULL) goto FAIL_EXIT;
- if (*(++ptr) == CHAR_E) break;
- }
- continue;
- }
-
- /* Skip over character classes; this logic must be similar to the way they
- are handled for real. If the first character is '^', skip it. Also, if the
- first few characters (either before or after ^) are \Q\E or \E we skip them
- too. This makes for compatibility with Perl. Note the use of STR macros to
- encode "Q\\E" so that it works in UTF-8 on EBCDIC platforms. */
-
- if (*ptr == CHAR_LEFT_SQUARE_BRACKET)
- {
- BOOL negate_class = FALSE;
- for (;;)
- {
- if (ptr[1] == CHAR_BACKSLASH)
- {
- if (ptr[2] == CHAR_E)
- ptr+= 2;
- else if (STRNCMP_UC_C8(ptr + 2,
- STR_Q STR_BACKSLASH STR_E, 3) == 0)
- ptr += 4;
- else
- break;
- }
- else if (!negate_class && ptr[1] == CHAR_CIRCUMFLEX_ACCENT)
- {
- negate_class = TRUE;
- ptr++;
- }
- else break;
- }
-
- /* If the next character is ']', it is a data character that must be
- skipped, except in JavaScript compatibility mode. */
-
- if (ptr[1] == CHAR_RIGHT_SQUARE_BRACKET &&
- (cd->external_options & PCRE_JAVASCRIPT_COMPAT) == 0)
- ptr++;
-
- while (*(++ptr) != CHAR_RIGHT_SQUARE_BRACKET)
- {
- if (*ptr == CHAR_NULL) return -1;
- if (*ptr == CHAR_BACKSLASH)
- {
- if (*(++ptr) == CHAR_NULL) goto FAIL_EXIT;
- if (*ptr == CHAR_Q) for (;;)
- {
- while (*(++ptr) != CHAR_NULL && *ptr != CHAR_BACKSLASH) {};
- if (*ptr == CHAR_NULL) goto FAIL_EXIT;
- if (*(++ptr) == CHAR_E) break;
- }
- continue;
- }
- }
- continue;
- }
-
- /* Skip comments in /x mode */
-
- if (xmode && *ptr == CHAR_NUMBER_SIGN)
- {
- ptr++;
- while (*ptr != CHAR_NULL)
- {
- if (IS_NEWLINE(ptr)) { ptr += cd->nllen - 1; break; }
- ptr++;
-#ifdef SUPPORT_UTF
- if (utf) FORWARDCHAR(ptr);
-#endif
- }
- if (*ptr == CHAR_NULL) goto FAIL_EXIT;
- continue;
- }
-
- /* Check for the special metacharacters */
-
- if (*ptr == CHAR_LEFT_PARENTHESIS)
- {
- int rc = find_parens_sub(&ptr, cd, name, lorn, xmode, utf, count);
- if (rc > 0) return rc;
- if (*ptr == CHAR_NULL) goto FAIL_EXIT;
- }
-
- else if (*ptr == CHAR_RIGHT_PARENTHESIS)
- {
- if (dup_parens && *count < hwm_count) *count = hwm_count;
- goto FAIL_EXIT;
- }
-
- else if (*ptr == CHAR_VERTICAL_LINE && dup_parens)
- {
- if (*count > hwm_count) hwm_count = *count;
- *count = start_count;
- }
- }
-
-FAIL_EXIT:
-*ptrptr = ptr;
-return -1;
-}
-
-
-
-
-/*************************************************
-* Find forward referenced subpattern *
-*************************************************/
-
-/* This function scans along a pattern's text looking for capturing
-subpatterns, and counting them. If it finds a named pattern that matches the
-name it is given, it returns its number. Alternatively, if the name is NULL, it
-returns when it reaches a given numbered subpattern. This is used for forward
-references to subpatterns. We used to be able to start this scan from the
-current compiling point, using the current count value from cd->bracount, and
-do it all in a single loop, but the addition of the possibility of duplicate
-subpattern numbers means that we have to scan from the very start, in order to
-take account of such duplicates, and to use a recursive function to keep track
-of the different types of group.
-
-Arguments:
- cd compile background data
- name name to seek, or NULL if seeking a numbered subpattern
- lorn name length, or subpattern number if name is NULL
- xmode TRUE if we are in /x mode
- utf TRUE if we are in UTF-8 / UTF-16 / UTF-32 mode
-
-Returns: the number of the found subpattern, or -1 if not found
-*/
-
-static int
-find_parens(compile_data *cd, const pcre_uchar *name, int lorn, BOOL xmode,
- BOOL utf)
-{
-pcre_uchar *ptr = (pcre_uchar *)cd->start_pattern;
-int count = 0;
-int rc;
-
-/* If the pattern does not start with an opening parenthesis, the first call
-to find_parens_sub() will scan right to the end (if necessary). However, if it
-does start with a parenthesis, find_parens_sub() will return when it hits the
-matching closing parens. That is why we have to have a loop. */
-
-for (;;)
- {
- rc = find_parens_sub(&ptr, cd, name, lorn, xmode, utf, &count);
- if (rc > 0 || *ptr++ == CHAR_NULL) break;
- }
-
-return rc;
-}
-
-
-
-
-/*************************************************
* Find first significant op code *
*************************************************/
@@ -1690,9 +1662,9 @@ for (;;)
case OP_CALLOUT:
case OP_CREF:
- case OP_NCREF:
+ case OP_DNCREF:
case OP_RREF:
- case OP_NRREF:
+ case OP_DNRREF:
case OP_DEF:
code += PRIV(OP_lengths)[*code];
break;
@@ -1706,7 +1678,6 @@ for (;;)
-
/*************************************************
* Find the fixed length of a branch *
*************************************************/
@@ -1830,13 +1801,13 @@ for (;;)
case OP_COMMIT:
case OP_CREF:
case OP_DEF:
+ case OP_DNCREF:
+ case OP_DNRREF:
case OP_DOLL:
case OP_DOLLM:
case OP_EOD:
case OP_EODN:
case OP_FAIL:
- case OP_NCREF:
- case OP_NRREF:
case OP_NOT_WORD_BOUNDARY:
case OP_PRUNE:
case OP_REVERSE:
@@ -1931,16 +1902,20 @@ for (;;)
switch (*cc)
{
- case OP_CRPLUS:
- case OP_CRMINPLUS:
case OP_CRSTAR:
case OP_CRMINSTAR:
+ case OP_CRPLUS:
+ case OP_CRMINPLUS:
case OP_CRQUERY:
case OP_CRMINQUERY:
+ case OP_CRPOSSTAR:
+ case OP_CRPOSPLUS:
+ case OP_CRPOSQUERY:
return -1;
case OP_CRRANGE:
case OP_CRMINRANGE:
+ case OP_CRPOSRANGE:
if (GET2(cc,1) != GET2(cc,1+IMM2_SIZE)) return -1;
branchlength += (int)GET2(cc,1);
cc += 1 + 2 * IMM2_SIZE;
@@ -2009,6 +1984,8 @@ for (;;)
case OP_QUERYI:
case OP_REF:
case OP_REFI:
+ case OP_DNREF:
+ case OP_DNREFI:
case OP_SBRA:
case OP_SBRAPOS:
case OP_SCBRA:
@@ -2045,7 +2022,6 @@ for (;;)
-
/*************************************************
* Scan compiled regex for specific bracket *
*************************************************/
@@ -2129,9 +2105,6 @@ for (;;)
case OP_MARK:
case OP_PRUNE_ARG:
case OP_SKIP_ARG:
- code += code[1];
- break;
-
case OP_THEN_ARG:
code += code[1];
break;
@@ -2249,9 +2222,6 @@ for (;;)
case OP_MARK:
case OP_PRUNE_ARG:
case OP_SKIP_ARG:
- code += code[1];
- break;
-
case OP_THEN_ARG:
code += code[1];
break;
@@ -2353,15 +2323,23 @@ Arguments:
endcode points to where to stop
utf TRUE if in UTF-8 / UTF-16 / UTF-32 mode
cd contains pointers to tables etc.
+ recurses chain of recurse_check to catch mutual recursion
Returns: TRUE if what is matched could be empty
*/
+typedef struct recurse_check {
+ struct recurse_check *prev;
+ const pcre_uchar *group;
+} recurse_check;
+
static BOOL
could_be_empty_branch(const pcre_uchar *code, const pcre_uchar *endcode,
- BOOL utf, compile_data *cd)
+ BOOL utf, compile_data *cd, recurse_check *recurses)
{
register pcre_uchar c;
+recurse_check this_recurse;
+
for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
code < endcode;
code = first_significant_code(code + PRIV(OP_lengths)[c], TRUE))
@@ -2389,25 +2367,50 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
if (c == OP_RECURSE)
{
- const pcre_uchar *scode;
+ const pcre_uchar *scode = cd->start_code + GET(code, 1);
BOOL empty_branch;
- /* Test for forward reference */
+ /* Test for forward reference or uncompleted reference. This is disabled
+ when called to scan a completed pattern by setting cd->start_workspace to
+ NULL. */
- for (scode = cd->start_workspace; scode < cd->hwm; scode += LINK_SIZE)
- if ((int)GET(scode, 0) == (int)(code + 1 - cd->start_code)) return TRUE;
+ if (cd->start_workspace != NULL)
+ {
+ const pcre_uchar *tcode;
+ for (tcode = cd->start_workspace; tcode < cd->hwm; tcode += LINK_SIZE)
+ if ((int)GET(tcode, 0) == (int)(code + 1 - cd->start_code)) return TRUE;
+ if (GET(scode, 1) == 0) return TRUE; /* Unclosed */
+ }
- /* Not a forward reference, test for completed backward reference */
+ /* If we are scanning a completed pattern, there are no forward references
+ and all groups are complete. We need to detect whether this is a recursive
+ call, as otherwise there will be an infinite loop. If it is a recursion,
+ just skip over it. Simple recursions are easily detected. For mutual
+ recursions we keep a chain on the stack. */
- empty_branch = FALSE;
- scode = cd->start_code + GET(code, 1);
- if (GET(scode, 1) == 0) return TRUE; /* Unclosed */
+ else
+ {
+ recurse_check *r = recurses;
+ const pcre_uchar *endgroup = scode;
- /* Completed backwards reference */
+ do endgroup += GET(endgroup, 1); while (*endgroup == OP_ALT);
+ if (code >= scode && code <= endgroup) continue; /* Simple recursion */
+
+ for (r = recurses; r != NULL; r = r->prev)
+ if (r->group == scode) break;
+ if (r != NULL) continue; /* Mutual recursion */
+ }
+
+ /* Completed reference; scan the referenced group, remembering it on the
+ stack chain to detect mutual recursions. */
+
+ empty_branch = FALSE;
+ this_recurse.prev = recurses;
+ this_recurse.group = scode;
do
{
- if (could_be_empty_branch(scode, endcode, utf, cd))
+ if (could_be_empty_branch(scode, endcode, utf, cd, &this_recurse))
{
empty_branch = TRUE;
break;
@@ -2463,7 +2466,7 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
empty_branch = FALSE;
do
{
- if (!empty_branch && could_be_empty_branch(code, endcode, utf, cd))
+ if (!empty_branch && could_be_empty_branch(code, endcode, utf, cd, NULL))
empty_branch = TRUE;
code += GET(code, 1);
}
@@ -2505,15 +2508,19 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
case OP_CRMINSTAR:
case OP_CRQUERY:
case OP_CRMINQUERY:
+ case OP_CRPOSSTAR:
+ case OP_CRPOSQUERY:
break;
default: /* Non-repeat => class must match */
case OP_CRPLUS: /* These repeats aren't empty */
case OP_CRMINPLUS:
+ case OP_CRPOSPLUS:
return FALSE;
case OP_CRRANGE:
case OP_CRMINRANGE:
+ case OP_CRPOSRANGE:
if (GET2(ccode, 1) > 0) return FALSE; /* Minimum > 0 */
break;
}
@@ -2521,34 +2528,57 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
/* Opcodes that must match a character */
+ case OP_ANY:
+ case OP_ALLANY:
+ case OP_ANYBYTE:
+
case OP_PROP:
case OP_NOTPROP:
+ case OP_ANYNL:
+
+ case OP_NOT_HSPACE:
+ case OP_HSPACE:
+ case OP_NOT_VSPACE:
+ case OP_VSPACE:
case OP_EXTUNI:
+
case OP_NOT_DIGIT:
case OP_DIGIT:
case OP_NOT_WHITESPACE:
case OP_WHITESPACE:
case OP_NOT_WORDCHAR:
case OP_WORDCHAR:
- case OP_ANY:
- case OP_ALLANY:
- case OP_ANYBYTE:
+
case OP_CHAR:
case OP_CHARI:
case OP_NOT:
case OP_NOTI:
+
case OP_PLUS:
+ case OP_PLUSI:
case OP_MINPLUS:
- case OP_POSPLUS:
- case OP_EXACT:
+ case OP_MINPLUSI:
+
case OP_NOTPLUS:
+ case OP_NOTPLUSI:
case OP_NOTMINPLUS:
+ case OP_NOTMINPLUSI:
+
+ case OP_POSPLUS:
+ case OP_POSPLUSI:
case OP_NOTPOSPLUS:
+ case OP_NOTPOSPLUSI:
+
+ case OP_EXACT:
+ case OP_EXACTI:
case OP_NOTEXACT:
+ case OP_NOTEXACTI:
+
case OP_TYPEPLUS:
case OP_TYPEMINPLUS:
case OP_TYPEPOSPLUS:
case OP_TYPEEXACT:
+
return FALSE;
/* These are going to continue, as they may be empty, but we have to
@@ -2582,30 +2612,58 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
return TRUE;
/* In UTF-8 mode, STAR, MINSTAR, POSSTAR, QUERY, MINQUERY, POSQUERY, UPTO,
- MINUPTO, and POSUPTO may be followed by a multibyte character */
+ MINUPTO, and POSUPTO and their caseless and negative versions may be
+ followed by a multibyte character. */
#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
case OP_STAR:
case OP_STARI:
+ case OP_NOTSTAR:
+ case OP_NOTSTARI:
+
case OP_MINSTAR:
case OP_MINSTARI:
+ case OP_NOTMINSTAR:
+ case OP_NOTMINSTARI:
+
case OP_POSSTAR:
case OP_POSSTARI:
+ case OP_NOTPOSSTAR:
+ case OP_NOTPOSSTARI:
+
case OP_QUERY:
case OP_QUERYI:
+ case OP_NOTQUERY:
+ case OP_NOTQUERYI:
+
case OP_MINQUERY:
case OP_MINQUERYI:
+ case OP_NOTMINQUERY:
+ case OP_NOTMINQUERYI:
+
case OP_POSQUERY:
case OP_POSQUERYI:
+ case OP_NOTPOSQUERY:
+ case OP_NOTPOSQUERYI:
+
if (utf && HAS_EXTRALEN(code[1])) code += GET_EXTRALEN(code[1]);
break;
case OP_UPTO:
case OP_UPTOI:
+ case OP_NOTUPTO:
+ case OP_NOTUPTOI:
+
case OP_MINUPTO:
case OP_MINUPTOI:
+ case OP_NOTMINUPTO:
+ case OP_NOTMINUPTOI:
+
case OP_POSUPTO:
case OP_POSUPTOI:
+ case OP_NOTPOSUPTO:
+ case OP_NOTPOSUPTOI:
+
if (utf && HAS_EXTRALEN(code[1 + IMM2_SIZE])) code += GET_EXTRALEN(code[1 + IMM2_SIZE]);
break;
#endif
@@ -2616,9 +2674,6 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
case OP_MARK:
case OP_PRUNE_ARG:
case OP_SKIP_ARG:
- code += code[1];
- break;
-
case OP_THEN_ARG:
code += code[1];
break;
@@ -2662,7 +2717,7 @@ could_be_empty(const pcre_uchar *code, const pcre_uchar *endcode,
{
while (bcptr != NULL && bcptr->current_branch >= code)
{
- if (!could_be_empty_branch(bcptr->current_branch, endcode, utf, cd))
+ if (!could_be_empty_branch(bcptr->current_branch, endcode, utf, cd, NULL))
return FALSE;
bcptr = bcptr->outer;
}
@@ -2672,6 +2727,1072 @@ return TRUE;
/*************************************************
+* Base opcode of repeated opcodes *
+*************************************************/
+
+/* Returns the base opcode for repeated single character type opcodes. If the
+opcode is not a repeated character type, it returns with the original value.
+
+Arguments: c opcode
+Returns: base opcode for the type
+*/
+
+static pcre_uchar
+get_repeat_base(pcre_uchar c)
+{
+return (c > OP_TYPEPOSUPTO)? c :
+ (c >= OP_TYPESTAR)? OP_TYPESTAR :
+ (c >= OP_NOTSTARI)? OP_NOTSTARI :
+ (c >= OP_NOTSTAR)? OP_NOTSTAR :
+ (c >= OP_STARI)? OP_STARI :
+ OP_STAR;
+}
+
+
+
+#ifdef SUPPORT_UCP
+/*************************************************
+* Check a character and a property *
+*************************************************/
+
+/* This function is called by check_auto_possessive() when a property item
+is adjacent to a fixed character.
+
+Arguments:
+ c the character
+ ptype the property type
+ pdata the data for the type
+ negated TRUE if it's a negated property (\P or \p{^)
+
+Returns: TRUE if auto-possessifying is OK
+*/
+
+static BOOL
+check_char_prop(pcre_uint32 c, unsigned int ptype, unsigned int pdata,
+ BOOL negated)
+{
+const pcre_uint32 *p;
+const ucd_record *prop = GET_UCD(c);
+
+switch(ptype)
+ {
+ case PT_LAMP:
+ return (prop->chartype == ucp_Lu ||
+ prop->chartype == ucp_Ll ||
+ prop->chartype == ucp_Lt) == negated;
+
+ case PT_GC:
+ return (pdata == PRIV(ucp_gentype)[prop->chartype]) == negated;
+
+ case PT_PC:
+ return (pdata == prop->chartype) == negated;
+
+ case PT_SC:
+ return (pdata == prop->script) == negated;
+
+ /* These are specials */
+
+ case PT_ALNUM:
+ return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
+ PRIV(ucp_gentype)[prop->chartype] == ucp_N) == negated;
+
+ /* Perl space used to exclude VT, but from Perl 5.18 it is included, which
+ means that Perl space and POSIX space are now identical. PCRE was changed
+ at release 8.34. */
+
+ case PT_SPACE: /* Perl space */
+ case PT_PXSPACE: /* POSIX space */
+ switch(c)
+ {
+ HSPACE_CASES:
+ VSPACE_CASES:
+ return negated;
+
+ default:
+ return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == negated;
+ }
+ break; /* Control never reaches here */
+
+ case PT_WORD:
+ return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
+ PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
+ c == CHAR_UNDERSCORE) == negated;
+
+ case PT_CLIST:
+ p = PRIV(ucd_caseless_sets) + prop->caseset;
+ for (;;)
+ {
+ if (c < *p) return !negated;
+ if (c == *p++) return negated;
+ }
+ break; /* Control never reaches here */
+ }
+
+return FALSE;
+}
+#endif /* SUPPORT_UCP */
+
+
+
+/*************************************************
+* Fill the character property list *
+*************************************************/
+
+/* Checks whether the code points to an opcode that can take part in auto-
+possessification, and if so, fills a list with its properties.
+
+Arguments:
+ code points to start of expression
+ utf TRUE if in UTF-8 / UTF-16 / UTF-32 mode
+ fcc points to case-flipping table
+ list points to output list
+ list[0] will be filled with the opcode
+ list[1] will be non-zero if this opcode
+ can match an empty character string
+ list[2..7] depends on the opcode
+
+Returns: points to the start of the next opcode if *code is accepted
+ NULL if *code is not accepted
+*/
+
+static const pcre_uchar *
+get_chr_property_list(const pcre_uchar *code, BOOL utf,
+ const pcre_uint8 *fcc, pcre_uint32 *list)
+{
+pcre_uchar c = *code;
+pcre_uchar base;
+const pcre_uchar *end;
+pcre_uint32 chr;
+
+#ifdef SUPPORT_UCP
+pcre_uint32 *clist_dest;
+const pcre_uint32 *clist_src;
+#else
+utf = utf; /* Suppress "unused parameter" compiler warning */
+#endif
+
+list[0] = c;
+list[1] = FALSE;
+code++;
+
+if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
+ {
+ base = get_repeat_base(c);
+ c -= (base - OP_STAR);
+
+ if (c == OP_UPTO || c == OP_MINUPTO || c == OP_EXACT || c == OP_POSUPTO)
+ code += IMM2_SIZE;
+
+ list[1] = (c != OP_PLUS && c != OP_MINPLUS && c != OP_EXACT && c != OP_POSPLUS);
+
+ switch(base)
+ {
+ case OP_STAR:
+ list[0] = OP_CHAR;
+ break;
+
+ case OP_STARI:
+ list[0] = OP_CHARI;
+ break;
+
+ case OP_NOTSTAR:
+ list[0] = OP_NOT;
+ break;
+
+ case OP_NOTSTARI:
+ list[0] = OP_NOTI;
+ break;
+
+ case OP_TYPESTAR:
+ list[0] = *code;
+ code++;
+ break;
+ }
+ c = list[0];
+ }
+
+switch(c)
+ {
+ case OP_NOT_DIGIT:
+ case OP_DIGIT:
+ case OP_NOT_WHITESPACE:
+ case OP_WHITESPACE:
+ case OP_NOT_WORDCHAR:
+ case OP_WORDCHAR:
+ case OP_ANY:
+ case OP_ALLANY:
+ case OP_ANYNL:
+ case OP_NOT_HSPACE:
+ case OP_HSPACE:
+ case OP_NOT_VSPACE:
+ case OP_VSPACE:
+ case OP_EXTUNI:
+ case OP_EODN:
+ case OP_EOD:
+ case OP_DOLL:
+ case OP_DOLLM:
+ return code;
+
+ case OP_CHAR:
+ case OP_NOT:
+ GETCHARINCTEST(chr, code);
+ list[2] = chr;
+ list[3] = NOTACHAR;
+ return code;
+
+ case OP_CHARI:
+ case OP_NOTI:
+ list[0] = (c == OP_CHARI) ? OP_CHAR : OP_NOT;
+ GETCHARINCTEST(chr, code);
+ list[2] = chr;
+
+#ifdef SUPPORT_UCP
+ if (chr < 128 || (chr < 256 && !utf))
+ list[3] = fcc[chr];
+ else
+ list[3] = UCD_OTHERCASE(chr);
+#elif defined SUPPORT_UTF || !defined COMPILE_PCRE8
+ list[3] = (chr < 256) ? fcc[chr] : chr;
+#else
+ list[3] = fcc[chr];
+#endif
+
+ /* The othercase might be the same value. */
+
+ if (chr == list[3])
+ list[3] = NOTACHAR;
+ else
+ list[4] = NOTACHAR;
+ return code;
+
+#ifdef SUPPORT_UCP
+ case OP_PROP:
+ case OP_NOTPROP:
+ if (code[0] != PT_CLIST)
+ {
+ list[2] = code[0];
+ list[3] = code[1];
+ return code + 2;
+ }
+
+ /* Convert only if we have enough space. */
+
+ clist_src = PRIV(ucd_caseless_sets) + code[1];
+ clist_dest = list + 2;
+ code += 2;
+
+ do {
+ if (clist_dest >= list + 8)
+ {
+ /* Early return if there is not enough space. This should never
+ happen, since all clists are shorter than 5 character now. */
+ list[2] = code[0];
+ list[3] = code[1];
+ return code;
+ }
+ *clist_dest++ = *clist_src;
+ }
+ while(*clist_src++ != NOTACHAR);
+
+ /* All characters are stored. The terminating NOTACHAR
+ is copied form the clist itself. */
+
+ list[0] = (c == OP_PROP) ? OP_CHAR : OP_NOT;
+ return code;
+#endif
+
+ case OP_NCLASS:
+ case OP_CLASS:
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
+ case OP_XCLASS:
+ if (c == OP_XCLASS)
+ end = code + GET(code, 0) - 1;
+ else
+#endif
+ end = code + 32 / sizeof(pcre_uchar);
+
+ switch(*end)
+ {
+ case OP_CRSTAR:
+ case OP_CRMINSTAR:
+ case OP_CRQUERY:
+ case OP_CRMINQUERY:
+ case OP_CRPOSSTAR:
+ case OP_CRPOSQUERY:
+ list[1] = TRUE;
+ end++;
+ break;
+
+ case OP_CRPLUS:
+ case OP_CRMINPLUS:
+ case OP_CRPOSPLUS:
+ end++;
+ break;
+
+ case OP_CRRANGE:
+ case OP_CRMINRANGE:
+ case OP_CRPOSRANGE:
+ list[1] = (GET2(end, 1) == 0);
+ end += 1 + 2 * IMM2_SIZE;
+ break;
+ }
+ list[2] = end - code;
+ return end;
+ }
+return NULL; /* Opcode not accepted */
+}
+
+
+
+/*************************************************
+* Scan further character sets for match *
+*************************************************/
+
+/* Checks whether the base and the current opcode have a common character, in
+which case the base cannot be possessified.
+
+Arguments:
+ code points to the byte code
+ utf TRUE in UTF-8 / UTF-16 / UTF-32 mode
+ cd static compile data
+ base_list the data list of the base opcode
+
+Returns: TRUE if the auto-possessification is possible
+*/
+
+static BOOL
+compare_opcodes(const pcre_uchar *code, BOOL utf, const compile_data *cd,
+ const pcre_uint32 *base_list, const pcre_uchar *base_end)
+{
+pcre_uchar c;
+pcre_uint32 list[8];
+const pcre_uint32 *chr_ptr;
+const pcre_uint32 *ochr_ptr;
+const pcre_uint32 *list_ptr;
+const pcre_uchar *next_code;
+const pcre_uint8 *class_bitset;
+const pcre_uint32 *set1, *set2, *set_end;
+pcre_uint32 chr;
+BOOL accepted, invert_bits;
+
+/* Note: the base_list[1] contains whether the current opcode has greedy
+(represented by a non-zero value) quantifier. This is a different from
+other character type lists, which stores here that the character iterator
+matches to an empty string (also represented by a non-zero value). */
+
+for(;;)
+ {
+ /* All operations move the code pointer forward.
+ Therefore infinite recursions are not possible. */
+
+ c = *code;
+
+ /* Skip over callouts */
+
+ if (c == OP_CALLOUT)
+ {
+ code += PRIV(OP_lengths)[c];
+ continue;
+ }
+
+ if (c == OP_ALT)
+ {
+ do code += GET(code, 1); while (*code == OP_ALT);
+ c = *code;
+ }
+
+ switch(c)
+ {
+ case OP_END:
+ case OP_KETRPOS:
+ /* TRUE only in greedy case. The non-greedy case could be replaced by
+ an OP_EXACT, but it is probably not worth it. (And note that OP_EXACT
+ uses more memory, which we cannot get at this stage.) */
+
+ return base_list[1] != 0;
+
+ case OP_KET:
+ /* If the bracket is capturing, and referenced by an OP_RECURSE, or
+ it is an atomic sub-pattern (assert, once, etc.) the non-greedy case
+ cannot be converted to a possessive form. */
+
+ if (base_list[1] == 0) return FALSE;
+
+ switch(*(code - GET(code, 1)))
+ {
+ case OP_ASSERT:
+ case OP_ASSERT_NOT:
+ case OP_ASSERTBACK:
+ case OP_ASSERTBACK_NOT:
+ case OP_ONCE:
+ case OP_ONCE_NC:
+ /* Atomic sub-patterns and assertions can always auto-possessify their
+ last iterator. */
+ return TRUE;
+ }
+
+ code += PRIV(OP_lengths)[c];
+ continue;
+
+ case OP_ONCE:
+ case OP_ONCE_NC:
+ case OP_BRA:
+ case OP_CBRA:
+ next_code = code + GET(code, 1);
+ code += PRIV(OP_lengths)[c];
+
+ while (*next_code == OP_ALT)
+ {
+ if (!compare_opcodes(code, utf, cd, base_list, base_end)) return FALSE;
+ code = next_code + 1 + LINK_SIZE;
+ next_code += GET(next_code, 1);
+ }
+ continue;
+
+ case OP_BRAZERO:
+ case OP_BRAMINZERO:
+
+ next_code = code + 1;
+ if (*next_code != OP_BRA && *next_code != OP_CBRA
+ && *next_code != OP_ONCE && *next_code != OP_ONCE_NC) return FALSE;
+
+ do next_code += GET(next_code, 1); while (*next_code == OP_ALT);
+
+ /* The bracket content will be checked by the
+ OP_BRA/OP_CBRA case above. */
+ next_code += 1 + LINK_SIZE;
+ if (!compare_opcodes(next_code, utf, cd, base_list, base_end))
+ return FALSE;
+
+ code += PRIV(OP_lengths)[c];
+ continue;
+ }
+
+ /* Check for a supported opcode, and load its properties. */
+
+ code = get_chr_property_list(code, utf, cd->fcc, list);
+ if (code == NULL) return FALSE; /* Unsupported */
+
+ /* If either opcode is a small character list, set pointers for comparing
+ characters from that list with another list, or with a property. */
+
+ if (base_list[0] == OP_CHAR)
+ {
+ chr_ptr = base_list + 2;
+ list_ptr = list;
+ }
+ else if (list[0] == OP_CHAR)
+ {
+ chr_ptr = list + 2;
+ list_ptr = base_list;
+ }
+
+ /* Character bitsets can also be compared to certain opcodes. */
+
+ else if (base_list[0] == OP_CLASS || list[0] == OP_CLASS
+#ifdef COMPILE_PCRE8
+ /* In 8 bit, non-UTF mode, OP_CLASS and OP_NCLASS are the same. */
+ || (!utf && (base_list[0] == OP_NCLASS || list[0] == OP_NCLASS))
+#endif
+ )
+ {
+#ifdef COMPILE_PCRE8
+ if (base_list[0] == OP_CLASS || (!utf && base_list[0] == OP_NCLASS))
+#else
+ if (base_list[0] == OP_CLASS)
+#endif
+ {
+ set1 = (pcre_uint32 *)(base_end - base_list[2]);
+ list_ptr = list;
+ }
+ else
+ {
+ set1 = (pcre_uint32 *)(code - list[2]);
+ list_ptr = base_list;
+ }
+
+ invert_bits = FALSE;
+ switch(list_ptr[0])
+ {
+ case OP_CLASS:
+ case OP_NCLASS:
+ set2 = (pcre_uint32 *)
+ ((list_ptr == list ? code : base_end) - list_ptr[2]);
+ break;
+
+ /* OP_XCLASS cannot be supported here, because its bitset
+ is not necessarily complete. E.g: [a-\0x{200}] is stored
+ as a character range, and the appropriate bits are not set. */
+
+ case OP_NOT_DIGIT:
+ invert_bits = TRUE;
+ /* Fall through */
+ case OP_DIGIT:
+ set2 = (pcre_uint32 *)(cd->cbits + cbit_digit);
+ break;
+
+ case OP_NOT_WHITESPACE:
+ invert_bits = TRUE;
+ /* Fall through */
+ case OP_WHITESPACE:
+ set2 = (pcre_uint32 *)(cd->cbits + cbit_space);
+ break;
+
+ case OP_NOT_WORDCHAR:
+ invert_bits = TRUE;
+ /* Fall through */
+ case OP_WORDCHAR:
+ set2 = (pcre_uint32 *)(cd->cbits + cbit_word);
+ break;
+
+ default:
+ return FALSE;
+ }
+
+ /* Compare 4 bytes to improve speed. */
+ set_end = set1 + (32 / 4);
+ if (invert_bits)
+ {
+ do
+ {
+ if ((*set1++ & ~(*set2++)) != 0) return FALSE;
+ }
+ while (set1 < set_end);
+ }
+ else
+ {
+ do
+ {
+ if ((*set1++ & *set2++) != 0) return FALSE;
+ }
+ while (set1 < set_end);
+ }
+
+ if (list[1] == 0) return TRUE;
+ /* Might be an empty repeat. */
+ continue;
+ }
+
+ /* Some property combinations also acceptable. Unicode property opcodes are
+ processed specially; the rest can be handled with a lookup table. */
+
+ else
+ {
+ pcre_uint32 leftop, rightop;
+
+ leftop = base_list[0];
+ rightop = list[0];
+
+#ifdef SUPPORT_UCP
+ accepted = FALSE; /* Always set in non-unicode case. */
+ if (leftop == OP_PROP || leftop == OP_NOTPROP)
+ {
+ if (rightop == OP_EOD)
+ accepted = TRUE;
+ else if (rightop == OP_PROP || rightop == OP_NOTPROP)
+ {
+ int n;
+ const pcre_uint8 *p;
+ BOOL same = leftop == rightop;
+ BOOL lisprop = leftop == OP_PROP;
+ BOOL risprop = rightop == OP_PROP;
+ BOOL bothprop = lisprop && risprop;
+
+ /* There's a table that specifies how each combination is to be
+ processed:
+ 0 Always return FALSE (never auto-possessify)
+ 1 Character groups are distinct (possessify if both are OP_PROP)
+ 2 Check character categories in the same group (general or particular)
+ 3 Return TRUE if the two opcodes are not the same
+ ... see comments below
+ */
+
+ n = propposstab[base_list[2]][list[2]];
+ switch(n)
+ {
+ case 0: break;
+ case 1: accepted = bothprop; break;
+ case 2: accepted = (base_list[3] == list[3]) != same; break;
+ case 3: accepted = !same; break;
+
+ case 4: /* Left general category, right particular category */
+ accepted = risprop && catposstab[base_list[3]][list[3]] == same;
+ break;
+
+ case 5: /* Right general category, left particular category */
+ accepted = lisprop && catposstab[list[3]][base_list[3]] == same;
+ break;
+
+ /* This code is logically tricky. Think hard before fiddling with it.
+ The posspropstab table has four entries per row. Each row relates to
+ one of PCRE's special properties such as ALNUM or SPACE or WORD.
+ Only WORD actually needs all four entries, but using repeats for the
+ others means they can all use the same code below.
+
+ The first two entries in each row are Unicode general categories, and
+ apply always, because all the characters they include are part of the
+ PCRE character set. The third and fourth entries are a general and a
+ particular category, respectively, that include one or more relevant
+ characters. One or the other is used, depending on whether the check
+ is for a general or a particular category. However, in both cases the
+ category contains more characters than the specials that are defined
+ for the property being tested against. Therefore, it cannot be used
+ in a NOTPROP case.
+
+ Example: the row for WORD contains ucp_L, ucp_N, ucp_P, ucp_Po.
+ Underscore is covered by ucp_P or ucp_Po. */
+
+ case 6: /* Left alphanum vs right general category */
+ case 7: /* Left space vs right general category */
+ case 8: /* Left word vs right general category */
+ p = posspropstab[n-6];
+ accepted = risprop && lisprop ==
+ (list[3] != p[0] &&
+ list[3] != p[1] &&
+ (list[3] != p[2] || !lisprop));
+ break;
+
+ case 9: /* Right alphanum vs left general category */
+ case 10: /* Right space vs left general category */
+ case 11: /* Right word vs left general category */
+ p = posspropstab[n-9];
+ accepted = lisprop && risprop ==
+ (base_list[3] != p[0] &&
+ base_list[3] != p[1] &&
+ (base_list[3] != p[2] || !risprop));
+ break;
+
+ case 12: /* Left alphanum vs right particular category */
+ case 13: /* Left space vs right particular category */
+ case 14: /* Left word vs right particular category */
+ p = posspropstab[n-12];
+ accepted = risprop && lisprop ==
+ (catposstab[p[0]][list[3]] &&
+ catposstab[p[1]][list[3]] &&
+ (list[3] != p[3] || !lisprop));
+ break;
+
+ case 15: /* Right alphanum vs left particular category */
+ case 16: /* Right space vs left particular category */
+ case 17: /* Right word vs left particular category */
+ p = posspropstab[n-15];
+ accepted = lisprop && risprop ==
+ (catposstab[p[0]][base_list[3]] &&
+ catposstab[p[1]][base_list[3]] &&
+ (base_list[3] != p[3] || !risprop));
+ break;
+ }
+ }
+ }
+
+ else
+#endif /* SUPPORT_UCP */
+
+ accepted = leftop >= FIRST_AUTOTAB_OP && leftop <= LAST_AUTOTAB_LEFT_OP &&
+ rightop >= FIRST_AUTOTAB_OP && rightop <= LAST_AUTOTAB_RIGHT_OP &&
+ autoposstab[leftop - FIRST_AUTOTAB_OP][rightop - FIRST_AUTOTAB_OP];
+
+ if (!accepted)
+ return FALSE;
+
+ if (list[1] == 0) return TRUE;
+ /* Might be an empty repeat. */
+ continue;
+ }
+
+ /* Control reaches here only if one of the items is a small character list.
+ All characters are checked against the other side. */
+
+ do
+ {
+ chr = *chr_ptr;
+
+ switch(list_ptr[0])
+ {
+ case OP_CHAR:
+ ochr_ptr = list_ptr + 2;
+ do
+ {
+ if (chr == *ochr_ptr) return FALSE;
+ ochr_ptr++;
+ }
+ while(*ochr_ptr != NOTACHAR);
+ break;
+
+ case OP_NOT:
+ ochr_ptr = list_ptr + 2;
+ do
+ {
+ if (chr == *ochr_ptr)
+ break;
+ ochr_ptr++;
+ }
+ while(*ochr_ptr != NOTACHAR);
+ if (*ochr_ptr == NOTACHAR) return FALSE; /* Not found */
+ break;
+
+ /* Note that OP_DIGIT etc. are generated only when PCRE_UCP is *not*
+ set. When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */
+
+ case OP_DIGIT:
+ if (chr < 256 && (cd->ctypes[chr] & ctype_digit) != 0) return FALSE;
+ break;
+
+ case OP_NOT_DIGIT:
+ if (chr > 255 || (cd->ctypes[chr] & ctype_digit) == 0) return FALSE;
+ break;
+
+ case OP_WHITESPACE:
+ if (chr < 256 && (cd->ctypes[chr] & ctype_space) != 0) return FALSE;
+ break;
+
+ case OP_NOT_WHITESPACE:
+ if (chr > 255 || (cd->ctypes[chr] & ctype_space) == 0) return FALSE;
+ break;
+
+ case OP_WORDCHAR:
+ if (chr < 255 && (cd->ctypes[chr] & ctype_word) != 0) return FALSE;
+ break;
+
+ case OP_NOT_WORDCHAR:
+ if (chr > 255 || (cd->ctypes[chr] & ctype_word) == 0) return FALSE;
+ break;
+
+ case OP_HSPACE:
+ switch(chr)
+ {
+ HSPACE_CASES: return FALSE;
+ default: break;
+ }
+ break;
+
+ case OP_NOT_HSPACE:
+ switch(chr)
+ {
+ HSPACE_CASES: break;
+ default: return FALSE;
+ }
+ break;
+
+ case OP_ANYNL:
+ case OP_VSPACE:
+ switch(chr)
+ {
+ VSPACE_CASES: return FALSE;
+ default: break;
+ }
+ break;
+
+ case OP_NOT_VSPACE:
+ switch(chr)
+ {
+ VSPACE_CASES: break;
+ default: return FALSE;
+ }
+ break;
+
+ case OP_DOLL:
+ case OP_EODN:
+ switch (chr)
+ {
+ case CHAR_CR:
+ case CHAR_LF:
+ case CHAR_VT:
+ case CHAR_FF:
+ case CHAR_NEL:
+#ifndef EBCDIC
+ case 0x2028:
+ case 0x2029:
+#endif /* Not EBCDIC */
+ return FALSE;
+ }
+ break;
+
+ case OP_EOD: /* Can always possessify before \z */
+ break;
+
+#ifdef SUPPORT_UCP
+ case OP_PROP:
+ case OP_NOTPROP:
+ if (!check_char_prop(chr, list_ptr[2], list_ptr[3],
+ list_ptr[0] == OP_NOTPROP))
+ return FALSE;
+ break;
+#endif
+
+ case OP_NCLASS:
+ if (chr > 255) return FALSE;
+ /* Fall through */
+
+ case OP_CLASS:
+ if (chr > 255) break;
+ class_bitset = (pcre_uint8 *)
+ ((list_ptr == list ? code : base_end) - list_ptr[2]);
+ if ((class_bitset[chr >> 3] & (1 << (chr & 7))) != 0) return FALSE;
+ break;
+
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
+ case OP_XCLASS:
+ if (PRIV(xclass)(chr, (list_ptr == list ? code : base_end) -
+ list_ptr[2] + LINK_SIZE, utf)) return FALSE;
+ break;
+#endif
+
+ default:
+ return FALSE;
+ }
+
+ chr_ptr++;
+ }
+ while(*chr_ptr != NOTACHAR);
+
+ /* At least one character must be matched from this opcode. */
+
+ if (list[1] == 0) return TRUE;
+ }
+
+return FALSE;
+}
+
+
+
+/*************************************************
+* Scan compiled regex for auto-possession *
+*************************************************/
+
+/* Replaces single character iterations with their possessive alternatives
+if appropriate. This function modifies the compiled opcode!
+
+Arguments:
+ code points to start of the byte code
+ utf TRUE in UTF-8 / UTF-16 / UTF-32 mode
+ cd static compile data
+
+Returns: nothing
+*/
+
+static void
+auto_possessify(pcre_uchar *code, BOOL utf, const compile_data *cd)
+{
+register pcre_uchar c;
+const pcre_uchar *end;
+pcre_uchar *repeat_opcode;
+pcre_uint32 list[8];
+
+for (;;)
+ {
+ c = *code;
+
+ if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
+ {
+ c -= get_repeat_base(c) - OP_STAR;
+ end = (c <= OP_MINUPTO) ?
+ get_chr_property_list(code, utf, cd->fcc, list) : NULL;
+ list[1] = c == OP_STAR || c == OP_PLUS || c == OP_QUERY || c == OP_UPTO;
+
+ if (end != NULL && compare_opcodes(end, utf, cd, list, end))
+ {
+ switch(c)
+ {
+ case OP_STAR:
+ *code += OP_POSSTAR - OP_STAR;
+ break;
+
+ case OP_MINSTAR:
+ *code += OP_POSSTAR - OP_MINSTAR;
+ break;
+
+ case OP_PLUS:
+ *code += OP_POSPLUS - OP_PLUS;
+ break;
+
+ case OP_MINPLUS:
+ *code += OP_POSPLUS - OP_MINPLUS;
+ break;
+
+ case OP_QUERY:
+ *code += OP_POSQUERY - OP_QUERY;
+ break;
+
+ case OP_MINQUERY:
+ *code += OP_POSQUERY - OP_MINQUERY;
+ break;
+
+ case OP_UPTO:
+ *code += OP_POSUPTO - OP_UPTO;
+ break;
+
+ case OP_MINUPTO:
+ *code += OP_MINUPTO - OP_UPTO;
+ break;
+ }
+ }
+ c = *code;
+ }
+ else if (c == OP_CLASS || c == OP_NCLASS || c == OP_XCLASS)
+ {
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
+ if (c == OP_XCLASS)
+ repeat_opcode = code + GET(code, 1);
+ else
+#endif
+ repeat_opcode = code + 1 + (32 / sizeof(pcre_uchar));
+
+ c = *repeat_opcode;
+ if (c >= OP_CRSTAR && c <= OP_CRMINRANGE)
+ {
+ /* end must not be NULL. */
+ end = get_chr_property_list(code, utf, cd->fcc, list);
+
+ list[1] = (c & 1) == 0;
+
+ if (compare_opcodes(end, utf, cd, list, end))
+ {
+ switch (c)
+ {
+ case OP_CRSTAR:
+ case OP_CRMINSTAR:
+ *repeat_opcode = OP_CRPOSSTAR;
+ break;
+
+ case OP_CRPLUS:
+ case OP_CRMINPLUS:
+ *repeat_opcode = OP_CRPOSPLUS;
+ break;
+
+ case OP_CRQUERY:
+ case OP_CRMINQUERY:
+ *repeat_opcode = OP_CRPOSQUERY;
+ break;
+
+ case OP_CRRANGE:
+ case OP_CRMINRANGE:
+ *repeat_opcode = OP_CRPOSRANGE;
+ break;
+ }
+ }
+ }
+ c = *code;
+ }
+
+ switch(c)
+ {
+ case OP_END:
+ return;
+
+ case OP_TYPESTAR:
+ case OP_TYPEMINSTAR:
+ case OP_TYPEPLUS:
+ case OP_TYPEMINPLUS:
+ case OP_TYPEQUERY:
+ case OP_TYPEMINQUERY:
+ case OP_TYPEPOSSTAR:
+ case OP_TYPEPOSPLUS:
+ case OP_TYPEPOSQUERY:
+ if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
+ break;
+
+ case OP_TYPEUPTO:
+ case OP_TYPEMINUPTO:
+ case OP_TYPEEXACT:
+ case OP_TYPEPOSUPTO:
+ if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
+ code += 2;
+ break;
+
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
+ case OP_XCLASS:
+ code += GET(code, 1);
+ break;
+#endif
+
+ case OP_MARK:
+ case OP_PRUNE_ARG:
+ case OP_SKIP_ARG:
+ case OP_THEN_ARG:
+ code += code[1];
+ break;
+ }
+
+ /* Add in the fixed length from the table */
+
+ code += PRIV(OP_lengths)[c];
+
+ /* In UTF-8 mode, opcodes that are followed by a character may be followed by
+ a multi-byte character. The length in the table is a minimum, so we have to
+ arrange to skip the extra bytes. */
+
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
+ if (utf) switch(c)
+ {
+ case OP_CHAR:
+ case OP_CHARI:
+ case OP_NOT:
+ case OP_NOTI:
+ case OP_STAR:
+ case OP_MINSTAR:
+ case OP_PLUS:
+ case OP_MINPLUS:
+ case OP_QUERY:
+ case OP_MINQUERY:
+ case OP_UPTO:
+ case OP_MINUPTO:
+ case OP_EXACT:
+ case OP_POSSTAR:
+ case OP_POSPLUS:
+ case OP_POSQUERY:
+ case OP_POSUPTO:
+ case OP_STARI:
+ case OP_MINSTARI:
+ case OP_PLUSI:
+ case OP_MINPLUSI:
+ case OP_QUERYI:
+ case OP_MINQUERYI:
+ case OP_UPTOI:
+ case OP_MINUPTOI:
+ case OP_EXACTI:
+ case OP_POSSTARI:
+ case OP_POSPLUSI:
+ case OP_POSQUERYI:
+ case OP_POSUPTOI:
+ case OP_NOTSTAR:
+ case OP_NOTMINSTAR:
+ case OP_NOTPLUS:
+ case OP_NOTMINPLUS:
+ case OP_NOTQUERY:
+ case OP_NOTMINQUERY:
+ case OP_NOTUPTO:
+ case OP_NOTMINUPTO:
+ case OP_NOTEXACT:
+ case OP_NOTPOSSTAR:
+ case OP_NOTPOSPLUS:
+ case OP_NOTPOSQUERY:
+ case OP_NOTPOSUPTO:
+ case OP_NOTSTARI:
+ case OP_NOTMINSTARI:
+ case OP_NOTPLUSI:
+ case OP_NOTMINPLUSI:
+ case OP_NOTQUERYI:
+ case OP_NOTMINQUERYI:
+ case OP_NOTUPTOI:
+ case OP_NOTMINUPTOI:
+ case OP_NOTEXACTI:
+ case OP_NOTPOSSTARI:
+ case OP_NOTPOSPLUSI:
+ case OP_NOTPOSQUERYI:
+ case OP_NOTPOSUPTOI:
+ if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
+ break;
+ }
+#else
+ (void)(utf); /* Keep compiler happy by referencing function argument */
+#endif
+ }
+}
+
+
+
+/*************************************************
* Check for POSIX class syntax *
*************************************************/
@@ -2692,7 +3813,7 @@ class, but [abc[:x\]pqr:]] is (so that an error can be generated). The code
below handles the special case of \], but does not try to do any other escape
processing. This makes it different from Perl for cases such as [:l\ower:]
where Perl recognizes it as the POSIX class "lower" but PCRE does not recognize
-"l\ower". This is a lesser evil that not diagnosing bad classes when Perl does,
+"l\ower". This is a lesser evil than not diagnosing bad classes when Perl does,
I think.
A user pointed out that PCRE was rejecting [:a[:digit:]] whereas Perl was not.
@@ -2954,476 +4075,11 @@ for (++c; c <= d; c++)
*cptr = c; /* Rest of input range */
return 0;
}
-
-
-
-/*************************************************
-* Check a character and a property *
-*************************************************/
-
-/* This function is called by check_auto_possessive() when a property item
-is adjacent to a fixed character.
-
-Arguments:
- c the character
- ptype the property type
- pdata the data for the type
- negated TRUE if it's a negated property (\P or \p{^)
-
-Returns: TRUE if auto-possessifying is OK
-*/
-
-static BOOL
-check_char_prop(pcre_uint32 c, unsigned int ptype, unsigned int pdata, BOOL negated)
-{
-#ifdef SUPPORT_UCP
-const pcre_uint32 *p;
-#endif
-
-const ucd_record *prop = GET_UCD(c);
-
-switch(ptype)
- {
- case PT_LAMP:
- return (prop->chartype == ucp_Lu ||
- prop->chartype == ucp_Ll ||
- prop->chartype == ucp_Lt) == negated;
-
- case PT_GC:
- return (pdata == PRIV(ucp_gentype)[prop->chartype]) == negated;
-
- case PT_PC:
- return (pdata == prop->chartype) == negated;
-
- case PT_SC:
- return (pdata == prop->script) == negated;
-
- /* These are specials */
-
- case PT_ALNUM:
- return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
- PRIV(ucp_gentype)[prop->chartype] == ucp_N) == negated;
-
- case PT_SPACE: /* Perl space */
- return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
- c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
- == negated;
-
- case PT_PXSPACE: /* POSIX space */
- return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
- c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
- c == CHAR_FF || c == CHAR_CR)
- == negated;
-
- case PT_WORD:
- return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
- PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
- c == CHAR_UNDERSCORE) == negated;
-
-#ifdef SUPPORT_UCP
- case PT_CLIST:
- p = PRIV(ucd_caseless_sets) + prop->caseset;
- for (;;)
- {
- if (c < *p) return !negated;
- if (c == *p++) return negated;
- }
- break; /* Control never reaches here */
-#endif
- }
-
-return FALSE;
-}
#endif /* SUPPORT_UCP */
/*************************************************
-* Check if auto-possessifying is possible *
-*************************************************/
-
-/* This function is called for unlimited repeats of certain items, to see
-whether the next thing could possibly match the repeated item. If not, it makes
-sense to automatically possessify the repeated item.
-
-Arguments:
- previous pointer to the repeated opcode
- utf TRUE in UTF-8 / UTF-16 / UTF-32 mode
- ptr next character in pattern
- options options bits
- cd contains pointers to tables etc.
-
-Returns: TRUE if possessifying is wanted
-*/
-
-static BOOL
-check_auto_possessive(const pcre_uchar *previous, BOOL utf,
- const pcre_uchar *ptr, int options, compile_data *cd)
-{
-pcre_uint32 c = NOTACHAR;
-pcre_uint32 next;
-int escape;
-pcre_uchar op_code = *previous++;
-
-/* Skip whitespace and comments in extended mode */
-
-if ((options & PCRE_EXTENDED) != 0)
- {
- for (;;)
- {
- while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_space) != 0) ptr++;
- if (*ptr == CHAR_NUMBER_SIGN)
- {
- ptr++;
- while (*ptr != CHAR_NULL)
- {
- if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }
- ptr++;
-#ifdef SUPPORT_UTF
- if (utf) FORWARDCHAR(ptr);
-#endif
- }
- }
- else break;
- }
- }
-
-/* If the next item is one that we can handle, get its value. A non-negative
-value is a character, a negative value is an escape value. */
-
-if (*ptr == CHAR_BACKSLASH)
- {
- int temperrorcode = 0;
- escape = check_escape(&ptr, &next, &temperrorcode, cd->bracount, options, FALSE);
- if (temperrorcode != 0) return FALSE;
- ptr++; /* Point after the escape sequence */
- }
-else if (!MAX_255(*ptr) || (cd->ctypes[*ptr] & ctype_meta) == 0)
- {
- escape = 0;
-#ifdef SUPPORT_UTF
- if (utf) { GETCHARINC(next, ptr); } else
-#endif
- next = *ptr++;
- }
-else return FALSE;
-
-/* Skip whitespace and comments in extended mode */
-
-if ((options & PCRE_EXTENDED) != 0)
- {
- for (;;)
- {
- while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_space) != 0) ptr++;
- if (*ptr == CHAR_NUMBER_SIGN)
- {
- ptr++;
- while (*ptr != CHAR_NULL)
- {
- if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }
- ptr++;
-#ifdef SUPPORT_UTF
- if (utf) FORWARDCHAR(ptr);
-#endif
- }
- }
- else break;
- }
- }
-
-/* If the next thing is itself optional, we have to give up. */
-
-if (*ptr == CHAR_ASTERISK || *ptr == CHAR_QUESTION_MARK ||
- STRNCMP_UC_C8(ptr, STR_LEFT_CURLY_BRACKET STR_0 STR_COMMA, 3) == 0)
- return FALSE;
-
-/* If the previous item is a character, get its value. */
-
-if (op_code == OP_CHAR || op_code == OP_CHARI ||
- op_code == OP_NOT || op_code == OP_NOTI)
- {
-#ifdef SUPPORT_UTF
- GETCHARTEST(c, previous);
-#else
- c = *previous;
-#endif
- }
-
-/* Now compare the next item with the previous opcode. First, handle cases when
-the next item is a character. */
-
-if (escape == 0)
- {
- /* For a caseless UTF match, the next character may have more than one other
- case, which maps to the special PT_CLIST property. Check this first. */
-
-#ifdef SUPPORT_UCP
- if (utf && c != NOTACHAR && (options & PCRE_CASELESS) != 0)
- {
- unsigned int ocs = UCD_CASESET(next);
- if (ocs > 0) return check_char_prop(c, PT_CLIST, ocs, op_code >= OP_NOT);
- }
-#endif
-
- switch(op_code)
- {
- case OP_CHAR:
- return c != next;
-
- /* For CHARI (caseless character) we must check the other case. If we have
- Unicode property support, we can use it to test the other case of
- high-valued characters. We know that next can have only one other case,
- because multi-other-case characters are dealt with above. */
-
- case OP_CHARI:
- if (c == next) return FALSE;
-#ifdef SUPPORT_UTF
- if (utf)
- {
- pcre_uint32 othercase;
- if (next < 128) othercase = cd->fcc[next]; else
-#ifdef SUPPORT_UCP
- othercase = UCD_OTHERCASE(next);
-#else
- othercase = NOTACHAR;
-#endif
- return c != othercase;
- }
- else
-#endif /* SUPPORT_UTF */
- return (c != TABLE_GET(next, cd->fcc, next)); /* Not UTF */
-
- case OP_NOT:
- return c == next;
-
- case OP_NOTI:
- if (c == next) return TRUE;
-#ifdef SUPPORT_UTF
- if (utf)
- {
- pcre_uint32 othercase;
- if (next < 128) othercase = cd->fcc[next]; else
-#ifdef SUPPORT_UCP
- othercase = UCD_OTHERCASE(next);
-#else
- othercase = NOTACHAR;
-#endif
- return c == othercase;
- }
- else
-#endif /* SUPPORT_UTF */
- return (c == TABLE_GET(next, cd->fcc, next)); /* Not UTF */
-
- /* Note that OP_DIGIT etc. are generated only when PCRE_UCP is *not* set.
- When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */
-
- case OP_DIGIT:
- return next > 255 || (cd->ctypes[next] & ctype_digit) == 0;
-
- case OP_NOT_DIGIT:
- return next <= 255 && (cd->ctypes[next] & ctype_digit) != 0;
-
- case OP_WHITESPACE:
- return next > 255 || (cd->ctypes[next] & ctype_space) == 0;
-
- case OP_NOT_WHITESPACE:
- return next <= 255 && (cd->ctypes[next] & ctype_space) != 0;
-
- case OP_WORDCHAR:
- return next > 255 || (cd->ctypes[next] & ctype_word) == 0;
-
- case OP_NOT_WORDCHAR:
- return next <= 255 && (cd->ctypes[next] & ctype_word) != 0;
-
- case OP_HSPACE:
- case OP_NOT_HSPACE:
- switch(next)
- {
- HSPACE_CASES:
- return op_code == OP_NOT_HSPACE;
-
- default:
- return op_code != OP_NOT_HSPACE;
- }
-
- case OP_ANYNL:
- case OP_VSPACE:
- case OP_NOT_VSPACE:
- switch(next)
- {
- VSPACE_CASES:
- return op_code == OP_NOT_VSPACE;
-
- default:
- return op_code != OP_NOT_VSPACE;
- }
-
-#ifdef SUPPORT_UCP
- case OP_PROP:
- return check_char_prop(next, previous[0], previous[1], FALSE);
-
- case OP_NOTPROP:
- return check_char_prop(next, previous[0], previous[1], TRUE);
-#endif
-
- default:
- return FALSE;
- }
- }
-
-/* Handle the case when the next item is \d, \s, etc. Note that when PCRE_UCP
-is set, \d turns into ESC_du rather than ESC_d, etc., so ESC_d etc. are
-generated only when PCRE_UCP is *not* set, that is, when only ASCII
-characteristics are recognized. Similarly, the opcodes OP_DIGIT etc. are
-replaced by OP_PROP codes when PCRE_UCP is set. */
-
-switch(op_code)
- {
- case OP_CHAR:
- case OP_CHARI:
- switch(escape)
- {
- case ESC_d:
- return c > 255 || (cd->ctypes[c] & ctype_digit) == 0;
-
- case ESC_D:
- return c <= 255 && (cd->ctypes[c] & ctype_digit) != 0;
-
- case ESC_s:
- return c > 255 || (cd->ctypes[c] & ctype_space) == 0;
-
- case ESC_S:
- return c <= 255 && (cd->ctypes[c] & ctype_space) != 0;
-
- case ESC_w:
- return c > 255 || (cd->ctypes[c] & ctype_word) == 0;
-
- case ESC_W:
- return c <= 255 && (cd->ctypes[c] & ctype_word) != 0;
-
- case ESC_h:
- case ESC_H:
- switch(c)
- {
- HSPACE_CASES:
- return escape != ESC_h;
-
- default:
- return escape == ESC_h;
- }
-
- case ESC_v:
- case ESC_V:
- switch(c)
- {
- VSPACE_CASES:
- return escape != ESC_v;
-
- default:
- return escape == ESC_v;
- }
-
- /* When PCRE_UCP is set, these values get generated for \d etc. Find
- their substitutions and process them. The result will always be either
- ESC_p or ESC_P. Then fall through to process those values. */
-
-#ifdef SUPPORT_UCP
- case ESC_du:
- case ESC_DU:
- case ESC_wu:
- case ESC_WU:
- case ESC_su:
- case ESC_SU:
- {
- int temperrorcode = 0;
- ptr = substitutes[escape - ESC_DU];
- escape = check_escape(&ptr, &next, &temperrorcode, 0, options, FALSE);
- if (temperrorcode != 0) return FALSE;
- ptr++; /* For compatibility */
- }
- /* Fall through */
-
- case ESC_p:
- case ESC_P:
- {
- unsigned int ptype = 0, pdata = 0;
- int errorcodeptr;
- BOOL negated;
-
- ptr--; /* Make ptr point at the p or P */
- if (!get_ucp(&ptr, &negated, &ptype, &pdata, &errorcodeptr))
- return FALSE;
- ptr++; /* Point past the final curly ket */
-
- /* If the property item is optional, we have to give up. (When generated
- from \d etc by PCRE_UCP, this test will have been applied much earlier,
- to the original \d etc. At this point, ptr will point to a zero byte. */
-
- if (*ptr == CHAR_ASTERISK || *ptr == CHAR_QUESTION_MARK ||
- STRNCMP_UC_C8(ptr, STR_LEFT_CURLY_BRACKET STR_0 STR_COMMA, 3) == 0)
- return FALSE;
-
- /* Do the property check. */
-
- return check_char_prop(c, ptype, pdata, (escape == ESC_P) != negated);
- }
-#endif
-
- default:
- return FALSE;
- }
-
- /* In principle, support for Unicode properties should be integrated here as
- well. It means re-organizing the above code so as to get hold of the property
- values before switching on the op-code. However, I wonder how many patterns
- combine ASCII \d etc with Unicode properties? (Note that if PCRE_UCP is set,
- these op-codes are never generated.) */
-
- case OP_DIGIT:
- return escape == ESC_D || escape == ESC_s || escape == ESC_W ||
- escape == ESC_h || escape == ESC_v || escape == ESC_R;
-
- case OP_NOT_DIGIT:
- return escape == ESC_d;
-
- case OP_WHITESPACE:
- return escape == ESC_S || escape == ESC_d || escape == ESC_w;
-
- case OP_NOT_WHITESPACE:
- return escape == ESC_s || escape == ESC_h || escape == ESC_v || escape == ESC_R;
-
- case OP_HSPACE:
- return escape == ESC_S || escape == ESC_H || escape == ESC_d ||
- escape == ESC_w || escape == ESC_v || escape == ESC_R;
-
- case OP_NOT_HSPACE:
- return escape == ESC_h;
-
- /* Can't have \S in here because VT matches \S (Perl anomaly) */
- case OP_ANYNL:
- case OP_VSPACE:
- return escape == ESC_V || escape == ESC_d || escape == ESC_w;
-
- case OP_NOT_VSPACE:
- return escape == ESC_v || escape == ESC_R;
-
- case OP_WORDCHAR:
- return escape == ESC_W || escape == ESC_s || escape == ESC_h ||
- escape == ESC_v || escape == ESC_R;
-
- case OP_NOT_WORDCHAR:
- return escape == ESC_w || escape == ESC_d;
-
- default:
- return FALSE;
- }
-
-/* Control does not reach here */
-}
-
-
-
-/*************************************************
* Add a character or range to a class *
*************************************************/
@@ -3672,22 +4328,22 @@ to find out the amount of memory needed, as well as during the real compile
phase. The value of lengthptr distinguishes the two phases.
Arguments:
- optionsptr pointer to the option bits
- codeptr points to the pointer to the current code point
- ptrptr points to the current pattern pointer
- errorcodeptr points to error code variable
- firstcharptr place to put the first required character
+ optionsptr pointer to the option bits
+ codeptr points to the pointer to the current code point
+ ptrptr points to the current pattern pointer
+ errorcodeptr points to error code variable
+ firstcharptr place to put the first required character
firstcharflagsptr place to put the first character flags, or a negative number
- reqcharptr place to put the last required character
- reqcharflagsptr place to put the last required character flags, or a negative number
- bcptr points to current branch chain
- cond_depth conditional nesting depth
- cd contains pointers to tables etc.
- lengthptr NULL during the real compile phase
- points to length accumulator during pre-compile phase
-
-Returns: TRUE on success
- FALSE, with *errorcodeptr set non-zero on error
+ reqcharptr place to put the last required character
+ reqcharflagsptr place to put the last required character flags, or a negative number
+ bcptr points to current branch chain
+ cond_depth conditional nesting depth
+ cd contains pointers to tables etc.
+ lengthptr NULL during the real compile phase
+ points to length accumulator during pre-compile phase
+
+Returns: TRUE on success
+ FALSE, with *errorcodeptr set non-zero on error
*/
static BOOL
@@ -3910,58 +4566,67 @@ for (;; ptr++)
}
goto NORMAL_CHAR;
}
+ /* Control does not reach here. */
}
- /* Fill in length of a previous callout, except when the next thing is
- a quantifier. */
-
- is_quantifier =
- c == CHAR_ASTERISK || c == CHAR_PLUS || c == CHAR_QUESTION_MARK ||
- (c == CHAR_LEFT_CURLY_BRACKET && is_counted_repeat(ptr+1));
-
- if (!is_quantifier && previous_callout != NULL &&
- after_manual_callout-- <= 0)
- {
- if (lengthptr == NULL) /* Don't attempt in pre-compile phase */
- complete_callout(previous_callout, ptr, cd);
- previous_callout = NULL;
- }
-
- /* In extended mode, skip white space and comments. */
+ /* In extended mode, skip white space and comments. We need a loop in order
+ to check for more white space and more comments after a comment. */
if ((options & PCRE_EXTENDED) != 0)
{
- if (MAX_255(*ptr) && (cd->ctypes[c] & ctype_space) != 0) continue;
- if (c == CHAR_NUMBER_SIGN)
+ for (;;)
{
+ while (MAX_255(c) && (cd->ctypes[c] & ctype_space) != 0) c = *(++ptr);
+ if (c != CHAR_NUMBER_SIGN) break;
ptr++;
while (*ptr != CHAR_NULL)
{
- if (IS_NEWLINE(ptr)) { ptr += cd->nllen - 1; break; }
+ if (IS_NEWLINE(ptr)) /* For non-fixed-length newline cases, */
+ { /* IS_NEWLINE sets cd->nllen. */
+ ptr += cd->nllen;
+ break;
+ }
ptr++;
#ifdef SUPPORT_UTF
if (utf) FORWARDCHAR(ptr);
#endif
}
- if (*ptr != CHAR_NULL) continue;
-
- /* Else fall through to handle end of string */
- c = 0;
+ c = *ptr; /* Either NULL or the char after a newline */
}
}
- /* No auto callout for quantifiers. */
+ /* See if the next thing is a quantifier. */
- if ((options & PCRE_AUTO_CALLOUT) != 0 && !is_quantifier)
+ is_quantifier =
+ c == CHAR_ASTERISK || c == CHAR_PLUS || c == CHAR_QUESTION_MARK ||
+ (c == CHAR_LEFT_CURLY_BRACKET && is_counted_repeat(ptr+1));
+
+ /* Fill in length of a previous callout, except when the next thing is a
+ quantifier or when processing a property substitution string in UCP mode. */
+
+ if (!is_quantifier && previous_callout != NULL && nestptr == NULL &&
+ after_manual_callout-- <= 0)
+ {
+ if (lengthptr == NULL) /* Don't attempt in pre-compile phase */
+ complete_callout(previous_callout, ptr, cd);
+ previous_callout = NULL;
+ }
+
+ /* Create auto callout, except for quantifiers, or while processing property
+ strings that are substituted for \w etc in UCP mode. */
+
+ if ((options & PCRE_AUTO_CALLOUT) != 0 && !is_quantifier && nestptr == NULL)
{
previous_callout = code;
code = auto_callout(code, ptr, cd);
}
+ /* Process the next pattern item. */
+
switch(c)
{
/* ===================================================================*/
- case 0: /* The branch terminates at string end */
+ case CHAR_NULL: /* The branch terminates at string end */
case CHAR_VERTICAL_LINE: /* or | or ) */
case CHAR_RIGHT_PARENTHESIS:
*firstcharptr = firstchar;
@@ -4039,7 +4704,29 @@ for (;; ptr++)
}
goto NORMAL_CHAR;
+ /* In another (POSIX) regex library, the ugly syntax [[:<:]] and [[:>:]] is
+ used for "start of word" and "end of word". As these are otherwise illegal
+ sequences, we don't break anything by recognizing them. They are replaced
+ by \b(?=\w) and \b(?<=\w) respectively. Sequences like [a[:<:]] are
+ erroneous and are handled by the normal code below. */
+
case CHAR_LEFT_SQUARE_BRACKET:
+ if (STRNCMP_UC_C8(ptr+1, STRING_WEIRD_STARTWORD, 6) == 0)
+ {
+ nestptr = ptr + 7;
+ ptr = sub_start_of_word - 1;
+ continue;
+ }
+
+ if (STRNCMP_UC_C8(ptr+1, STRING_WEIRD_ENDWORD, 6) == 0)
+ {
+ nestptr = ptr + 7;
+ ptr = sub_end_of_word - 1;
+ continue;
+ }
+
+ /* Handle a real character class. */
+
previous = code;
/* PCRE supports POSIX class stuff inside a class. Perl gives an error if
@@ -4204,24 +4891,58 @@ for (;; ptr++)
posix_class = 0;
/* When PCRE_UCP is set, some of the POSIX classes are converted to
- different escape sequences that use Unicode properties. */
+ different escape sequences that use Unicode properties \p or \P. Others
+ that are not available via \p or \P generate XCL_PROP/XCL_NOTPROP
+ directly. */
#ifdef SUPPORT_UCP
if ((options & PCRE_UCP) != 0)
{
+ unsigned int ptype = 0;
int pc = posix_class + ((local_negate)? POSIX_SUBSIZE/2 : 0);
+
+ /* The posix_substitutes table specifies which POSIX classes can be
+ converted to \p or \P items. */
+
if (posix_substitutes[pc] != NULL)
{
nestptr = tempptr + 1;
ptr = posix_substitutes[pc] - 1;
continue;
}
+
+ /* There are three other classes that generate special property calls
+ that are recognized only in an XCLASS. */
+
+ else switch(posix_class)
+ {
+ case PC_GRAPH:
+ ptype = PT_PXGRAPH;
+ /* Fall through */
+ case PC_PRINT:
+ if (ptype == 0) ptype = PT_PXPRINT;
+ /* Fall through */
+ case PC_PUNCT:
+ if (ptype == 0) ptype = PT_PXPUNCT;
+ *class_uchardata++ = local_negate? XCL_NOTPROP : XCL_PROP;
+ *class_uchardata++ = ptype;
+ *class_uchardata++ = 0;
+ ptr = tempptr + 1;
+ continue;
+
+ /* For all other POSIX classes, no special action is taken in UCP
+ mode. Fall through to the non_UCP case. */
+
+ default:
+ break;
+ }
}
#endif
- /* In the non-UCP case, we build the bit map for the POSIX class in a
- chunk of local store because we may be adding and subtracting from it,
- and we don't want to subtract bits that may be in the main map already.
- At the end we or the result into the bit map that is being built. */
+ /* In the non-UCP case, or when UCP makes no difference, we build the
+ bit map for the POSIX class in a chunk of local store because we may be
+ adding and subtracting from it, and we don't want to subtract bits that
+ may be in the main map already. At the end we or the result into the
+ bit map that is being built. */
posix_class *= 3;
@@ -4277,14 +4998,12 @@ for (;; ptr++)
if (c == CHAR_BACKSLASH)
{
- escape = check_escape(&ptr, &ec, errorcodeptr, cd->bracount, options, TRUE);
-
+ escape = check_escape(&ptr, &ec, errorcodeptr, cd->bracount, options,
+ TRUE);
if (*errorcodeptr != 0) goto FAILED;
-
- if (escape == 0)
- c = ec;
+ if (escape == 0) c = ec;
else if (escape == ESC_b) c = CHAR_BS; /* \b is backspace in a class */
- else if (escape == ESC_N) /* \N is not supported in a class */
+ else if (escape == ESC_N) /* \N is not supported in a class */
{
*errorcodeptr = ERR71;
goto FAILED;
@@ -4340,21 +5059,20 @@ for (;; ptr++)
for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_word];
continue;
- /* Perl 5.004 onwards omits VT from \s, but we must preserve it
- if it was previously set by something earlier in the character
- class. Luckily, the value of CHAR_VT is 0x0b in both ASCII and
- EBCDIC, so we lazily just adjust the appropriate bit. */
+ /* Perl 5.004 onwards omitted VT from \s, but restored it at Perl
+ 5.18. Before PCRE 8.34, we had to preserve the VT bit if it was
+ previously set by something earlier in the character class.
+ Luckily, the value of CHAR_VT is 0x0b in both ASCII and EBCDIC, so
+ we could just adjust the appropriate bit. From PCRE 8.34 we no
+ longer treat \s and \S specially. */
case ESC_s:
- classbits[0] |= cbits[cbit_space];
- classbits[1] |= cbits[cbit_space+1] & ~0x08;
- for (c = 2; c < 32; c++) classbits[c] |= cbits[c+cbit_space];
+ for (c = 0; c < 32; c++) classbits[c] |= cbits[c+cbit_space];
continue;
case ESC_S:
should_flip_negation = TRUE;
for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_space];
- classbits[1] |= 0x08; /* Perl 5.004 onwards omits VT from \s */
continue;
/* The rest apply in both UCP and non-UCP cases. */
@@ -4476,26 +5194,43 @@ for (;; ptr++)
#endif
d = *ptr; /* Not UTF-8 mode */
- /* The second part of a range can be a single-character escape, but
- not any of the other escapes. Perl 5.6 treats a hyphen as a literal
- in such circumstances. */
+ /* The second part of a range can be a single-character escape
+ sequence, but not any of the other escapes. Perl treats a hyphen as a
+ literal in such circumstances. However, in Perl's warning mode, a
+ warning is given, so PCRE now faults it as it is almost certainly a
+ mistake on the user's part. */
- if (!inescq && d == CHAR_BACKSLASH)
+ if (!inescq)
{
- int descape;
- descape = check_escape(&ptr, &d, errorcodeptr, cd->bracount, options, TRUE);
- if (*errorcodeptr != 0) goto FAILED;
+ if (d == CHAR_BACKSLASH)
+ {
+ int descape;
+ descape = check_escape(&ptr, &d, errorcodeptr, cd->bracount, options, TRUE);
+ if (*errorcodeptr != 0) goto FAILED;
- /* \b is backspace; any other special means the '-' was literal. */
+ /* 0 means a character was put into d; \b is backspace; any other
+ special causes an error. */
- if (descape != 0)
- {
- if (descape == ESC_b) d = CHAR_BS; else
+ if (descape != 0)
{
- ptr = oldptr;
- goto CLASS_SINGLE_CHARACTER; /* A few lines below */
+ if (descape == ESC_b) d = CHAR_BS; else
+ {
+ *errorcodeptr = ERR83;
+ goto FAILED;
+ }
}
}
+
+ /* A hyphen followed by a POSIX class is treated in the same way. */
+
+ else if (d == CHAR_LEFT_SQUARE_BRACKET &&
+ (ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT ||
+ ptr[1] == CHAR_EQUALS_SIGN) &&
+ check_posix_syntax(ptr, &tempptr))
+ {
+ *errorcodeptr = ERR83;
+ goto FAILED;
+ }
}
/* Check that the two values are in the correct order. Optimize
@@ -4759,6 +5494,34 @@ for (;; ptr++)
tempcode = previous;
+ /* Before checking for a possessive quantifier, we must skip over
+ whitespace and comments in extended mode because Perl allows white space at
+ this point. */
+
+ if ((options & PCRE_EXTENDED) != 0)
+ {
+ const pcre_uchar *p = ptr + 1;
+ for (;;)
+ {
+ while (MAX_255(*p) && (cd->ctypes[*p] & ctype_space) != 0) p++;
+ if (*p != CHAR_NUMBER_SIGN) break;
+ p++;
+ while (*p != CHAR_NULL)
+ {
+ if (IS_NEWLINE(p)) /* For non-fixed-length newline cases, */
+ { /* IS_NEWLINE sets cd->nllen. */
+ p += cd->nllen;
+ break;
+ }
+ p++;
+#ifdef SUPPORT_UTF
+ if (utf) FORWARDCHAR(p);
+#endif
+ } /* Loop for comment characters */
+ } /* Loop for multiple comments */
+ ptr = p - 1; /* Character before the next significant one. */
+ }
+
/* If the next character is '+', we have a possessive quantifier. This
implies greediness, whatever the setting of the PCRE_UNGREEDY option.
If the next character is '?' this is a minimizing repeat, by default,
@@ -4853,19 +5616,6 @@ for (;; ptr++)
}
}
- /* If the repetition is unlimited, it pays to see if the next thing on
- the line is something that cannot possibly match this character. If so,
- automatically possessifying this item gains some performance in the case
- where the match fails. */
-
- if (!possessive_quantifier &&
- repeat_max < 0 &&
- check_auto_possessive(previous, utf, ptr + 1, options, cd))
- {
- repeat_type = 0; /* Force greedy */
- possessive_quantifier = TRUE;
- }
-
goto OUTPUT_SINGLE_REPEAT; /* Code shared with single character types */
}
@@ -4883,14 +5633,6 @@ for (;; ptr++)
op_type = OP_TYPESTAR - OP_STAR; /* Use type opcodes */
c = *previous;
- if (!possessive_quantifier &&
- repeat_max < 0 &&
- check_auto_possessive(previous, utf, ptr + 1, options, cd))
- {
- repeat_type = 0; /* Force greedy */
- possessive_quantifier = TRUE;
- }
-
OUTPUT_SINGLE_REPEAT:
if (*previous == OP_PROP || *previous == OP_NOTPROP)
{
@@ -4907,16 +5649,6 @@ for (;; ptr++)
if (repeat_max == 0) goto END_REPEAT;
- /*--------------------------------------------------------------------*/
- /* This code is obsolete from release 8.00; the restriction was finally
- removed: */
-
- /* All real repeats make it impossible to handle partial matching (maybe
- one day we will be able to remove this restriction). */
-
- /* if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL; */
- /*--------------------------------------------------------------------*/
-
/* Combine the op_type with the repeat_type */
repeat_type += op_type;
@@ -5049,13 +5781,12 @@ for (;; ptr++)
/* If previous was a character class or a back reference, we put the repeat
stuff after it, but just skip the item if the repeat was {0,0}. */
- else if (*previous == OP_CLASS ||
- *previous == OP_NCLASS ||
+ else if (*previous == OP_CLASS || *previous == OP_NCLASS ||
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
*previous == OP_XCLASS ||
#endif
- *previous == OP_REF ||
- *previous == OP_REFI)
+ *previous == OP_REF || *previous == OP_REFI ||
+ *previous == OP_DNREF || *previous == OP_DNREFI)
{
if (repeat_max == 0)
{
@@ -5063,16 +5794,6 @@ for (;; ptr++)
goto END_REPEAT;
}
- /*--------------------------------------------------------------------*/
- /* This code is obsolete from release 8.00; the restriction was finally
- removed: */
-
- /* All real repeats make it impossible to handle partial matching (maybe
- one day we will be able to remove this restriction). */
-
- /* if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL; */
- /*--------------------------------------------------------------------*/
-
if (repeat_min == 0 && repeat_max == -1)
*code++ = OP_CRSTAR + repeat_type;
else if (repeat_min == 1 && repeat_max == -1)
@@ -5093,8 +5814,9 @@ for (;; ptr++)
opcodes such as BRA and CBRA, as this is the place where they get converted
into the more special varieties such as BRAPOS and SBRA. A test for >=
OP_ASSERT and <= OP_COND includes ASSERT, ASSERT_NOT, ASSERTBACK,
- ASSERTBACK_NOT, ONCE, BRA, CBRA, and COND. Originally, PCRE did not allow
- repetition of assertions, but now it does, for Perl compatibility. */
+ ASSERTBACK_NOT, ONCE, ONCE_NC, BRA, BRAPOS, CBRA, CBRAPOS, and COND.
+ Originally, PCRE did not allow repetition of assertions, but now it does,
+ for Perl compatibility. */
else if (*previous >= OP_ASSERT && *previous <= OP_COND)
{
@@ -5112,7 +5834,7 @@ for (;; ptr++)
/* There is no sense in actually repeating assertions. The only potential
use of repetition is in cases when the assertion is optional. Therefore,
if the minimum is greater than zero, just ignore the repeat. If the
- maximum is not not zero or one, set it to 1. */
+ maximum is not zero or one, set it to 1. */
if (*previous < OP_ONCE) /* Assertion */
{
@@ -5415,7 +6137,7 @@ for (;; ptr++)
pcre_uchar *scode = bracode;
do
{
- if (could_be_empty_branch(scode, ketcode, utf, cd))
+ if (could_be_empty_branch(scode, ketcode, utf, cd, NULL))
{
*bracode += OP_SBRA - OP_BRA;
break;
@@ -5485,43 +6207,105 @@ for (;; ptr++)
goto FAILED;
}
- /* If the character following a repeat is '+', or if certain optimization
- tests above succeeded, possessive_quantifier is TRUE. For some opcodes,
- there are special alternative opcodes for this case. For anything else, we
- wrap the entire repeated item inside OP_ONCE brackets. Logically, the '+'
- notation is just syntactic sugar, taken from Sun's Java package, but the
- special opcodes can optimize it.
+ /* If the character following a repeat is '+', possessive_quantifier is
+ TRUE. For some opcodes, there are special alternative opcodes for this
+ case. For anything else, we wrap the entire repeated item inside OP_ONCE
+ brackets. Logically, the '+' notation is just syntactic sugar, taken from
+ Sun's Java package, but the special opcodes can optimize it.
Some (but not all) possessively repeated subpatterns have already been
completely handled in the code just above. For them, possessive_quantifier
- is always FALSE at this stage.
-
- Note that the repeated item starts at tempcode, not at previous, which
- might be the first part of a string whose (former) last char we repeated.
-
- Possessifying an 'exact' quantifier has no effect, so we can ignore it. But
- an 'upto' may follow. We skip over an 'exact' item, and then test the
- length of what remains before proceeding. */
+ is always FALSE at this stage. Note that the repeated item starts at
+ tempcode, not at previous, which might be the first part of a string whose
+ (former) last char we repeated. */
if (possessive_quantifier)
{
int len;
- if (*tempcode == OP_TYPEEXACT)
+ /* Possessifying an EXACT quantifier has no effect, so we can ignore it.
+ However, QUERY, STAR, or UPTO may follow (for quantifiers such as {5,6},
+ {5,}, or {5,10}). We skip over an EXACT item; if the length of what
+ remains is greater than zero, there's a further opcode that can be
+ handled. If not, do nothing, leaving the EXACT alone. */
+
+ switch(*tempcode)
+ {
+ case OP_TYPEEXACT:
tempcode += PRIV(OP_lengths)[*tempcode] +
((tempcode[1 + IMM2_SIZE] == OP_PROP
|| tempcode[1 + IMM2_SIZE] == OP_NOTPROP)? 2 : 0);
+ break;
- else if (*tempcode == OP_EXACT || *tempcode == OP_NOTEXACT)
- {
+ /* CHAR opcodes are used for exacts whose count is 1. */
+
+ case OP_CHAR:
+ case OP_CHARI:
+ case OP_NOT:
+ case OP_NOTI:
+ case OP_EXACT:
+ case OP_EXACTI:
+ case OP_NOTEXACT:
+ case OP_NOTEXACTI:
tempcode += PRIV(OP_lengths)[*tempcode];
#ifdef SUPPORT_UTF
if (utf && HAS_EXTRALEN(tempcode[-1]))
tempcode += GET_EXTRALEN(tempcode[-1]);
#endif
+ break;
+
+ /* For the class opcodes, the repeat operator appears at the end;
+ adjust tempcode to point to it. */
+
+ case OP_CLASS:
+ case OP_NCLASS:
+ tempcode += 1 + 32/sizeof(pcre_uchar);
+ break;
+
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
+ case OP_XCLASS:
+ tempcode += GET(tempcode, 1);
+ break;
+#endif
}
+ /* If tempcode is equal to code (which points to the end of the repeated
+ item), it means we have skipped an EXACT item but there is no following
+ QUERY, STAR, or UPTO; the value of len will be 0, and we do nothing. In
+ all other cases, tempcode will be pointing to the repeat opcode, and will
+ be less than code, so the value of len will be greater than 0. */
+
len = (int)(code - tempcode);
+ if (len > 0)
+ {
+ unsigned int repcode = *tempcode;
+
+ /* There is a table for possessifying opcodes, all of which are less
+ than OP_CALLOUT. A zero entry means there is no possessified version.
+ */
+
+ if (repcode < OP_CALLOUT && opcode_possessify[repcode] > 0)
+ *tempcode = opcode_possessify[repcode];
+
+ /* For opcode without a special possessified version, wrap the item in
+ ONCE brackets. Because we are moving code along, we must ensure that any
+ pending recursive references are updated. */
+
+ else
+ {
+ *code = OP_END;
+ adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, save_hwm);
+ memmove(tempcode + 1 + LINK_SIZE, tempcode, IN_UCHARS(len));
+ code += 1 + LINK_SIZE;
+ len += 1 + LINK_SIZE;
+ tempcode[0] = OP_ONCE;
+ *code++ = OP_KET;
+ PUTINC(code, 0, len);
+ PUT(tempcode, 1, len);
+ }
+ }
+
+#ifdef NEVER
if (len > 0) switch (*tempcode)
{
case OP_STAR: *tempcode = OP_POSSTAR; break;
@@ -5549,6 +6333,11 @@ for (;; ptr++)
case OP_TYPEQUERY: *tempcode = OP_TYPEPOSQUERY; break;
case OP_TYPEUPTO: *tempcode = OP_TYPEPOSUPTO; break;
+ case OP_CRSTAR: *tempcode = OP_CRPOSSTAR; break;
+ case OP_CRPLUS: *tempcode = OP_CRPOSPLUS; break;
+ case OP_CRQUERY: *tempcode = OP_CRPOSQUERY; break;
+ case OP_CRRANGE: *tempcode = OP_CRPOSRANGE; break;
+
/* Because we are moving code along, we must ensure that any
pending recursive references are updated. */
@@ -5564,6 +6353,7 @@ for (;; ptr++)
PUT(tempcode, 1, len);
break;
}
+#endif
}
/* In all case we no longer have a previous item. We also set the
@@ -5749,30 +6539,44 @@ for (;; ptr++)
/* ------------------------------------------------------------ */
case CHAR_LEFT_PARENTHESIS:
bravalue = OP_COND; /* Conditional group */
+ tempptr = ptr;
/* A condition can be an assertion, a number (referring to a numbered
- group), a name (referring to a named group), or 'R', referring to
- recursion. R<digits> and R&name are also permitted for recursion tests.
+ group's having been set), a name (referring to a named group), or 'R',
+ referring to recursion. R<digits> and R&name are also permitted for
+ recursion tests.
+
+ There are ways of testing a named group: (?(name)) is used by Python;
+ Perl 5.10 onwards uses (?(<name>) or (?('name')).
- There are several syntaxes for testing a named group: (?(name)) is used
- by Python; Perl 5.10 onwards uses (?(<name>) or (?('name')).
+ There is one unfortunate ambiguity, caused by history. 'R' can be the
+ recursive thing or the name 'R' (and similarly for 'R' followed by
+ digits). We look for a name first; if not found, we try the other case.
- There are two unfortunate ambiguities, caused by history. (a) 'R' can
- be the recursive thing or the name 'R' (and similarly for 'R' followed
- by digits), and (b) a number could be a name that consists of digits.
- In both cases, we look for a name first; if not found, we try the other
- cases. */
+ For compatibility with auto-callouts, we allow a callout to be
+ specified before a condition that is an assertion. First, check for the
+ syntax of a callout; if found, adjust the temporary pointer that is
+ used to check for an assertion condition. That's all that is needed! */
+
+ if (ptr[1] == CHAR_QUESTION_MARK && ptr[2] == CHAR_C)
+ {
+ for (i = 3;; i++) if (!IS_DIGIT(ptr[i])) break;
+ if (ptr[i] == CHAR_RIGHT_PARENTHESIS)
+ tempptr += i + 1;
+ }
/* For conditions that are assertions, check the syntax, and then exit
the switch. This will take control down to where bracketed groups,
including assertions, are processed. */
- if (ptr[1] == CHAR_QUESTION_MARK && (ptr[2] == CHAR_EQUALS_SIGN ||
- ptr[2] == CHAR_EXCLAMATION_MARK || ptr[2] == CHAR_LESS_THAN_SIGN))
+ if (tempptr[1] == CHAR_QUESTION_MARK &&
+ (tempptr[2] == CHAR_EQUALS_SIGN ||
+ tempptr[2] == CHAR_EXCLAMATION_MARK ||
+ tempptr[2] == CHAR_LESS_THAN_SIGN))
break;
- /* Most other conditions use OP_CREF (a couple change to OP_RREF
- below), and all need to skip 1+IMM2_SIZE bytes at the start of the group. */
+ /* Other conditions use OP_CREF/OP_DNCREF/OP_RREF/OP_DNRREF, and all
+ need to skip at least 1+IMM2_SIZE bytes at the start of the group. */
code[1+LINK_SIZE] = OP_CREF;
skipbytes = 1+IMM2_SIZE;
@@ -5780,7 +6584,8 @@ for (;; ptr++)
/* Check for a test for recursion in a named group. */
- if (ptr[1] == CHAR_R && ptr[2] == CHAR_AMPERSAND)
+ ptr++;
+ if (*ptr == CHAR_R && ptr[1] == CHAR_AMPERSAND)
{
terminator = -1;
ptr += 2;
@@ -5788,14 +6593,15 @@ for (;; ptr++)
}
/* Check for a test for a named group's having been set, using the Perl
- syntax (?(<name>) or (?('name') */
+ syntax (?(<name>) or (?('name'), and also allow for the original PCRE
+ syntax of (?(name) or for (?(+n), (?(-n), and just (?(n). */
- else if (ptr[1] == CHAR_LESS_THAN_SIGN)
+ else if (*ptr == CHAR_LESS_THAN_SIGN)
{
terminator = CHAR_GREATER_THAN_SIGN;
ptr++;
}
- else if (ptr[1] == CHAR_APOSTROPHE)
+ else if (*ptr == CHAR_APOSTROPHE)
{
terminator = CHAR_APOSTROPHE;
ptr++;
@@ -5803,35 +6609,55 @@ for (;; ptr++)
else
{
terminator = CHAR_NULL;
- if (ptr[1] == CHAR_MINUS || ptr[1] == CHAR_PLUS) refsign = *(++ptr);
+ if (*ptr == CHAR_MINUS || *ptr == CHAR_PLUS) refsign = *ptr++;
+ else if (IS_DIGIT(*ptr)) refsign = 0;
}
- /* We now expect to read a name; any thing else is an error */
+ /* Handle a number */
- if (!MAX_255(ptr[1]) || (cd->ctypes[ptr[1]] & ctype_word) == 0)
+ if (refsign >= 0)
{
- ptr += 1; /* To get the right offset */
- *errorcodeptr = ERR28;
- goto FAILED;
+ recno = 0;
+ while (IS_DIGIT(*ptr))
+ {
+ recno = recno * 10 + (int)(*ptr - CHAR_0);
+ ptr++;
+ }
}
- /* Read the name, but also get it as a number if it's all digits */
+ /* Otherwise we expect to read a name; anything else is an error. When
+ a name is one of a number of duplicates, a different opcode is used and
+ it needs more memory. Unfortunately we cannot tell whether a name is a
+ duplicate in the first pass, so we have to allow for more memory. */
- recno = 0;
- name = ++ptr;
- while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_word) != 0)
+ else
{
- if (recno >= 0)
- recno = (IS_DIGIT(*ptr))? recno * 10 + (int)(*ptr - CHAR_0) : -1;
- ptr++;
+ if (IS_DIGIT(*ptr))
+ {
+ *errorcodeptr = ERR84;
+ goto FAILED;
+ }
+ if (!MAX_255(*ptr) || (cd->ctypes[*ptr] & ctype_word) == 0)
+ {
+ *errorcodeptr = ERR28; /* Assertion expected */
+ goto FAILED;
+ }
+ name = ptr++;
+ while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_word) != 0)
+ {
+ ptr++;
+ }
+ namelen = (int)(ptr - name);
+ if (lengthptr != NULL) *lengthptr += IMM2_SIZE;
}
- namelen = (int)(ptr - name);
+
+ /* Check the terminator */
if ((terminator > 0 && *ptr++ != (pcre_uchar)terminator) ||
*ptr++ != CHAR_RIGHT_PARENTHESIS)
{
- ptr--; /* Error offset */
- *errorcodeptr = ERR26;
+ ptr--; /* Error offset */
+ *errorcodeptr = ERR26; /* Malformed number or name */
goto FAILED;
}
@@ -5840,18 +6666,18 @@ for (;; ptr++)
if (lengthptr != NULL) break;
/* In the real compile we do the work of looking for the actual
- reference. If the string started with "+" or "-" we require the rest to
- be digits, in which case recno will be set. */
+ reference. If refsign is not negative, it means we have a number in
+ recno. */
- if (refsign > 0)
+ if (refsign >= 0)
{
if (recno <= 0)
{
- *errorcodeptr = ERR58;
+ *errorcodeptr = ERR35;
goto FAILED;
}
- recno = (refsign == CHAR_MINUS)?
- cd->bracount - recno + 1 : recno +cd->bracount;
+ if (refsign != 0) recno = (refsign == CHAR_MINUS)?
+ cd->bracount - recno + 1 : recno + cd->bracount;
if (recno <= 0 || recno > cd->final_bracount)
{
*errorcodeptr = ERR15;
@@ -5861,11 +6687,7 @@ for (;; ptr++)
break;
}
- /* Otherwise (did not start with "+" or "-"), start by looking for the
- name. If we find a name, add one to the opcode to change OP_CREF or
- OP_RREF into OP_NCREF or OP_NRREF. These behave exactly the same,
- except they record that the reference was originally to a name. The
- information is used to check duplicate names. */
+ /* Otherwise look for the name. */
slot = cd->name_table;
for (i = 0; i < cd->names_found; i++)
@@ -5874,29 +6696,40 @@ for (;; ptr++)
slot += cd->name_entry_size;
}
- /* Found a previous named subpattern */
+ /* Found the named subpattern. If the name is duplicated, add one to
+ the opcode to change CREF/RREF into DNCREF/DNRREF and insert
+ appropriate data values. Otherwise, just insert the unique subpattern
+ number. */
if (i < cd->names_found)
{
- recno = GET2(slot, 0);
- PUT2(code, 2+LINK_SIZE, recno);
- code[1+LINK_SIZE]++;
- }
-
- /* Search the pattern for a forward reference */
-
- else if ((i = find_parens(cd, name, namelen,
- (options & PCRE_EXTENDED) != 0, utf)) > 0)
- {
- PUT2(code, 2+LINK_SIZE, i);
- code[1+LINK_SIZE]++;
+ int offset = i++;
+ int count = 1;
+ recno = GET2(slot, 0); /* Number from first found */
+ for (; i < cd->names_found; i++)
+ {
+ slot += cd->name_entry_size;
+ if (STRNCMP_UC_UC(name, slot+IMM2_SIZE, namelen) != 0) break;
+ count++;
+ }
+ if (count > 1)
+ {
+ PUT2(code, 2+LINK_SIZE, offset);
+ PUT2(code, 2+LINK_SIZE+IMM2_SIZE, count);
+ skipbytes += IMM2_SIZE;
+ code[1+LINK_SIZE]++;
+ }
+ else /* Not a duplicated name */
+ {
+ PUT2(code, 2+LINK_SIZE, recno);
+ }
}
/* If terminator == CHAR_NULL it means that the name followed directly
after the opening parenthesis [e.g. (?(abc)...] and in this case there
are some further alternatives to try. For the cases where terminator !=
- 0 [things like (?(<name>... or (?('name')... or (?(R&name)... ] we have
- now checked all the possibilities, so give an error. */
+ CHAR_NULL [things like (?(<name>... or (?('name')... or (?(R&name)... ]
+ we have now checked all the possibilities, so give an error. */
else if (terminator != CHAR_NULL)
{
@@ -5933,19 +6766,11 @@ for (;; ptr++)
skipbytes = 1;
}
- /* Check for the "name" actually being a subpattern number. We are
- in the second pass here, so final_bracount is set. */
-
- else if (recno > 0 && recno <= cd->final_bracount)
- {
- PUT2(code, 2+LINK_SIZE, recno);
- }
-
- /* Either an unidentified subpattern, or a reference to (?(0) */
+ /* Reference to an unidentified subpattern. */
else
{
- *errorcodeptr = (recno == 0)? ERR35: ERR15;
+ *errorcodeptr = ERR15;
goto FAILED;
}
break;
@@ -5958,11 +6783,18 @@ for (;; ptr++)
ptr++;
break;
+ /* Optimize (?!) to (*FAIL) unless it is quantified - which is a weird
+ thing to do, but Perl allows all assertions to be quantified, and when
+ they contain capturing parentheses there may be a potential use for
+ this feature. Not that that applies to a quantified (?!) but we allow
+ it for uniformity. */
/* ------------------------------------------------------------ */
case CHAR_EXCLAMATION_MARK: /* Negative lookahead */
ptr++;
- if (*ptr == CHAR_RIGHT_PARENTHESIS) /* Optimize (?!) */
+ if (*ptr == CHAR_RIGHT_PARENTHESIS && ptr[1] != CHAR_ASTERISK &&
+ ptr[1] != CHAR_PLUS && ptr[1] != CHAR_QUESTION_MARK &&
+ (ptr[1] != CHAR_LEFT_CURLY_BRACKET || !is_counted_repeat(ptr+2)))
{
*code++ = OP_FAIL;
previous = NULL;
@@ -6055,124 +6887,110 @@ for (;; ptr++)
/* ------------------------------------------------------------ */
DEFINE_NAME: /* Come here from (?< handling */
case CHAR_APOSTROPHE:
+ terminator = (*ptr == CHAR_LESS_THAN_SIGN)?
+ CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;
+ name = ++ptr;
+ if (IS_DIGIT(*ptr))
{
- terminator = (*ptr == CHAR_LESS_THAN_SIGN)?
- CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;
- name = ++ptr;
+ *errorcodeptr = ERR84; /* Group name must start with non-digit */
+ goto FAILED;
+ }
+ while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_word) != 0) ptr++;
+ namelen = (int)(ptr - name);
- while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_word) != 0) ptr++;
- namelen = (int)(ptr - name);
+ /* In the pre-compile phase, do a syntax check, remember the longest
+ name, and then remember the group in a vector, expanding it if
+ necessary. Duplicates for the same number are skipped; other duplicates
+ are checked for validity. In the actual compile, there is nothing to
+ do. */
+
+ if (lengthptr != NULL)
+ {
+ named_group *ng;
+ pcre_uint32 number = cd->bracount + 1;
- /* In the pre-compile phase, just do a syntax check. */
+ if (*ptr != (pcre_uchar)terminator)
+ {
+ *errorcodeptr = ERR42;
+ goto FAILED;
+ }
- if (lengthptr != NULL)
+ if (cd->names_found >= MAX_NAME_COUNT)
{
- if (*ptr != (pcre_uchar)terminator)
- {
- *errorcodeptr = ERR42;
- goto FAILED;
- }
- if (cd->names_found >= MAX_NAME_COUNT)
+ *errorcodeptr = ERR49;
+ goto FAILED;
+ }
+
+ if (namelen + IMM2_SIZE + 1 > cd->name_entry_size)
+ {
+ cd->name_entry_size = namelen + IMM2_SIZE + 1;
+ if (namelen > MAX_NAME_SIZE)
{
- *errorcodeptr = ERR49;
+ *errorcodeptr = ERR48;
goto FAILED;
}
- if (namelen + IMM2_SIZE + 1 > cd->name_entry_size)
+ }
+
+ /* Scan the list to check for duplicates. For duplicate names, if the
+ number is the same, break the loop, which causes the name to be
+ discarded; otherwise, if DUPNAMES is not set, give an error.
+ If it is set, allow the name with a different number, but continue
+ scanning in case this is a duplicate with the same number. For
+ non-duplicate names, give an error if the number is duplicated. */
+
+ ng = cd->named_groups;
+ for (i = 0; i < cd->names_found; i++, ng++)
+ {
+ if (namelen == ng->length &&
+ STRNCMP_UC_UC(name, ng->name, namelen) == 0)
{
- cd->name_entry_size = namelen + IMM2_SIZE + 1;
- if (namelen > MAX_NAME_SIZE)
+ if (ng->number == number) break;
+ if ((options & PCRE_DUPNAMES) == 0)
{
- *errorcodeptr = ERR48;
+ *errorcodeptr = ERR43;
goto FAILED;
}
+ cd->dupnames = TRUE; /* Duplicate names exist */
+ }
+ else if (ng->number == number)
+ {
+ *errorcodeptr = ERR65;
+ goto FAILED;
}
}
- /* In the real compile, create the entry in the table, maintaining
- alphabetical order. Duplicate names for different numbers are
- permitted only if PCRE_DUPNAMES is set. Duplicate names for the same
- number are always OK. (An existing number can be re-used if (?|
- appears in the pattern.) In either event, a duplicate name results in
- a duplicate entry in the table, even if the number is the same. This
- is because the number of names, and hence the table size, is computed
- in the pre-compile, and it affects various numbers and pointers which
- would all have to be modified, and the compiled code moved down, if
- duplicates with the same number were omitted from the table. This
- doesn't seem worth the hassle. However, *different* names for the
- same number are not permitted. */
-
- else
+ if (i >= cd->names_found) /* Not a duplicate with same number */
{
- BOOL dupname = FALSE;
- slot = cd->name_table;
+ /* Increase the list size if necessary */
- for (i = 0; i < cd->names_found; i++)
+ if (cd->names_found >= cd->named_group_list_size)
{
- int crc = memcmp(name, slot+IMM2_SIZE, IN_UCHARS(namelen));
- if (crc == 0)
- {
- if (slot[IMM2_SIZE+namelen] == 0)
- {
- if (GET2(slot, 0) != cd->bracount + 1 &&
- (options & PCRE_DUPNAMES) == 0)
- {
- *errorcodeptr = ERR43;
- goto FAILED;
- }
- else dupname = TRUE;
- }
- else crc = -1; /* Current name is a substring */
- }
-
- /* Make space in the table and break the loop for an earlier
- name. For a duplicate or later name, carry on. We do this for
- duplicates so that in the simple case (when ?(| is not used) they
- are in order of their numbers. */
+ int newsize = cd->named_group_list_size * 2;
+ named_group *newspace = (PUBL(malloc))
+ (newsize * sizeof(named_group));
- if (crc < 0)
+ if (newspace == NULL)
{
- memmove(slot + cd->name_entry_size, slot,
- IN_UCHARS((cd->names_found - i) * cd->name_entry_size));
- break;
+ *errorcodeptr = ERR21;
+ goto FAILED;
}
- /* Continue the loop for a later or duplicate name */
-
- slot += cd->name_entry_size;
- }
-
- /* For non-duplicate names, check for a duplicate number before
- adding the new name. */
-
- if (!dupname)
- {
- pcre_uchar *cslot = cd->name_table;
- for (i = 0; i < cd->names_found; i++)
- {
- if (cslot != slot)
- {
- if (GET2(cslot, 0) == cd->bracount + 1)
- {
- *errorcodeptr = ERR65;
- goto FAILED;
- }
- }
- else i--;
- cslot += cd->name_entry_size;
- }
+ memcpy(newspace, cd->named_groups,
+ cd->named_group_list_size * sizeof(named_group));
+ if (cd->named_group_list_size > NAMED_GROUP_LIST_SIZE)
+ (PUBL(free))((void *)cd->named_groups);
+ cd->named_groups = newspace;
+ cd->named_group_list_size = newsize;
}
- PUT2(slot, 0, cd->bracount + 1);
- memcpy(slot + IMM2_SIZE, name, IN_UCHARS(namelen));
- slot[IMM2_SIZE + namelen] = 0;
+ cd->named_groups[cd->names_found].name = name;
+ cd->named_groups[cd->names_found].length = namelen;
+ cd->named_groups[cd->names_found].number = number;
+ cd->names_found++;
}
}
- /* In both pre-compile and compile, count the number of names we've
- encountered. */
-
- cd->names_found++;
- ptr++; /* Move past > or ' */
+ ptr++; /* Move past > or ' in both passes. */
goto NUMBERED_GROUP;
@@ -6190,6 +7008,11 @@ for (;; ptr++)
NAMED_REF_OR_RECURSE:
name = ++ptr;
+ if (IS_DIGIT(*ptr))
+ {
+ *errorcodeptr = ERR84; /* Group name must start with non-digit */
+ goto FAILED;
+ }
while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_word) != 0) ptr++;
namelen = (int)(ptr - name);
@@ -6202,7 +7025,7 @@ for (;; ptr++)
if (lengthptr != NULL)
{
- const pcre_uchar *temp;
+ named_group *ng;
if (namelen == 0)
{
@@ -6220,27 +7043,29 @@ for (;; ptr++)
goto FAILED;
}
- /* The name table does not exist in the first pass, so we cannot
- do a simple search as in the code below. Instead, we have to scan the
- pattern to find the number. It is important that we scan it only as
- far as we have got because the syntax of named subpatterns has not
- been checked for the rest of the pattern, and find_parens() assumes
- correct syntax. In any case, it's a waste of resources to scan
- further. We stop the scan at the current point by temporarily
- adjusting the value of cd->endpattern. */
-
- temp = cd->end_pattern;
- cd->end_pattern = ptr;
- recno = find_parens(cd, name, namelen,
- (options & PCRE_EXTENDED) != 0, utf);
- cd->end_pattern = temp;
- if (recno < 0) recno = 0; /* Forward ref; set dummy number */
+ /* The name table does not exist in the first pass; instead we must
+ scan the list of names encountered so far in order to get the
+ number. If the name is not found, set the value to 0 for a forward
+ reference. */
+
+ ng = cd->named_groups;
+ for (i = 0; i < cd->names_found; i++, ng++)
+ {
+ if (namelen == ng->length &&
+ STRNCMP_UC_UC(name, ng->name, namelen) == 0)
+ break;
+ }
+ recno = (i < cd->names_found)? ng->number : 0;
+
+ /* Count named back references. */
+
+ if (!is_recurse) cd->namedrefcount++;
}
- /* In the real compile, seek the name in the table. We check the name
+ /* In the real compile, search the name table. We check the name
first, and then check that we have reached the end of the name in the
- table. That way, if the name that is longer than any in the table,
- the comparison will fail without reading beyond the table entry. */
+ table. That way, if the name is longer than any in the table, the
+ comparison will fail without reading beyond the table entry. */
else
{
@@ -6253,24 +7078,76 @@ for (;; ptr++)
slot += cd->name_entry_size;
}
- if (i < cd->names_found) /* Back reference */
+ if (i < cd->names_found)
{
recno = GET2(slot, 0);
}
- else if ((recno = /* Forward back reference */
- find_parens(cd, name, namelen,
- (options & PCRE_EXTENDED) != 0, utf)) <= 0)
+ else
{
*errorcodeptr = ERR15;
goto FAILED;
}
}
- /* In both phases, we can now go to the code than handles numerical
- recursion or backreferences. */
+ /* In both phases, for recursions, we can now go to the code than
+ handles numerical recursion. */
if (is_recurse) goto HANDLE_RECURSION;
- else goto HANDLE_REFERENCE;
+
+ /* In the second pass we must see if the name is duplicated. If so, we
+ generate a different opcode. */
+
+ if (lengthptr == NULL && cd->dupnames)
+ {
+ int count = 1;
+ unsigned int index = i;
+ pcre_uchar *cslot = slot + cd->name_entry_size;
+
+ for (i++; i < cd->names_found; i++)
+ {
+ if (STRCMP_UC_UC(slot + IMM2_SIZE, cslot + IMM2_SIZE) != 0) break;
+ count++;
+ cslot += cd->name_entry_size;
+ }
+
+ if (count > 1)
+ {
+ if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;
+ previous = code;
+ *code++ = ((options & PCRE_CASELESS) != 0)? OP_DNREFI : OP_DNREF;
+ PUT2INC(code, 0, index);
+ PUT2INC(code, 0, count);
+
+ /* Process each potentially referenced group. */
+
+ for (; slot < cslot; slot += cd->name_entry_size)
+ {
+ open_capitem *oc;
+ recno = GET2(slot, 0);
+ cd->backref_map |= (recno < 32)? (1 << recno) : 1;
+ if (recno > cd->top_backref) cd->top_backref = recno;
+
+ /* Check to see if this back reference is recursive, that it, it
+ is inside the group that it references. A flag is set so that the
+ group can be made atomic. */
+
+ for (oc = cd->open_caps; oc != NULL; oc = oc->next)
+ {
+ if (oc->number == recno)
+ {
+ oc->flag = TRUE;
+ break;
+ }
+ }
+ }
+
+ continue; /* End of back ref handling */
+ }
+ }
+
+ /* First pass, or a non-duplicated name. */
+
+ goto HANDLE_REFERENCE;
/* ------------------------------------------------------------ */
@@ -6369,8 +7246,7 @@ for (;; ptr++)
if (called == NULL)
{
- if (find_parens(cd, NULL, recno,
- (options & PCRE_EXTENDED) != 0, utf) < 0)
+ if (recno > cd->final_bracount)
{
*errorcodeptr = ERR15;
goto FAILED;
@@ -6529,10 +7405,19 @@ for (;; ptr++)
skipbytes = IMM2_SIZE;
}
- /* Process nested bracketed regex. Assertions used not to be repeatable,
- but this was changed for Perl compatibility, so all kinds can now be
- repeated. We copy code into a non-register variable (tempcode) in order to
- be able to pass its address because some compilers complain otherwise. */
+ /* Process nested bracketed regex. First check for parentheses nested too
+ deeply. */
+
+ if ((cd->parens_depth += 1) > PARENS_NEST_LIMIT)
+ {
+ *errorcodeptr = ERR82;
+ goto FAILED;
+ }
+
+ /* Assertions used not to be repeatable, but this was changed for Perl
+ compatibility, so all kinds can now be repeated. We copy code into a
+ non-register variable (tempcode) in order to be able to pass its address
+ because some compilers complain otherwise. */
previous = code; /* For handling repetition */
*code = bravalue;
@@ -6563,6 +7448,8 @@ for (;; ptr++)
))
goto FAILED;
+ cd->parens_depth -= 1;
+
/* If this was an atomic group and there are no capturing groups within it,
generate OP_ONCE_NC instead of OP_ONCE. */
@@ -6738,10 +7625,9 @@ for (;; ptr++)
case CHAR_BACKSLASH:
tempptr = ptr;
escape = check_escape(&ptr, &ec, errorcodeptr, cd->bracount, options, FALSE);
-
if (*errorcodeptr != 0) goto FAILED;
- if (escape == 0)
+ if (escape == 0) /* The escape coded a single character */
c = ec;
else
{
@@ -6778,44 +7664,31 @@ for (;; ptr++)
if (escape == ESC_g)
{
const pcre_uchar *p;
+ pcre_uint32 cf;
+
save_hwm = cd->hwm; /* Normally this is set when '(' is read */
terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?
CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;
/* These two statements stop the compiler for warning about possibly
unset variables caused by the jump to HANDLE_NUMERICAL_RECURSION. In
- fact, because we actually check for a number below, the paths that
+ fact, because we do the check for a number below, the paths that
would actually be in error are never taken. */
skipbytes = 0;
reset_bracount = FALSE;
- /* Test for a name */
+ /* If it's not a signed or unsigned number, treat it as a name. */
- if (ptr[1] != CHAR_PLUS && ptr[1] != CHAR_MINUS)
+ cf = ptr[1];
+ if (cf != CHAR_PLUS && cf != CHAR_MINUS && !IS_DIGIT(cf))
{
- BOOL is_a_number = TRUE;
- for (p = ptr + 1; *p != CHAR_NULL && *p != (pcre_uchar)terminator; p++)
- {
- if (!MAX_255(*p)) { is_a_number = FALSE; break; }
- if ((cd->ctypes[*p] & ctype_digit) == 0) is_a_number = FALSE;
- if ((cd->ctypes[*p] & ctype_word) == 0) break;
- }
- if (*p != (pcre_uchar)terminator)
- {
- *errorcodeptr = ERR57;
- break;
- }
- if (is_a_number)
- {
- ptr++;
- goto HANDLE_NUMERICAL_RECURSION;
- }
is_recurse = TRUE;
goto NAMED_REF_OR_RECURSE;
}
- /* Test a signed number in angle brackets or quotes. */
+ /* Signed or unsigned number (cf = ptr[1]) is known to be plus or minus
+ or a digit. */
p = ptr + 2;
while (IS_DIGIT(*p)) p++;
@@ -6855,7 +7728,10 @@ for (;; ptr++)
open_capitem *oc;
recno = -escape;
- HANDLE_REFERENCE: /* Come here from named backref handling */
+ /* Come here from named backref handling when the reference is to a
+ single group (i.e. not to a duplicated name. */
+
+ HANDLE_REFERENCE:
if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;
previous = code;
*code++ = ((options & PCRE_CASELESS) != 0)? OP_REFI : OP_REF;
@@ -6907,11 +7783,12 @@ for (;; ptr++)
can obtain the OP value by negating the escape value in the default
situation when PCRE_UCP is not set. When it *is* set, we substitute
Unicode property tests. Note that \b and \B do a one-character
- lookbehind. */
+ lookbehind, and \A also behaves as if it does. */
else
{
- if ((escape == ESC_b || escape == ESC_B) && cd->max_lookbehind == 0)
+ if ((escape == ESC_b || escape == ESC_B || escape == ESC_A) &&
+ cd->max_lookbehind == 0)
cd->max_lookbehind = 1;
#ifdef SUPPORT_UCP
if (escape >= ESC_DU && escape <= ESC_wu)
@@ -6951,8 +7828,8 @@ for (;; ptr++)
/* ===================================================================*/
/* Handle a literal character. It is guaranteed not to be whitespace or #
- when the extended flag is set. If we are in UTF-8 mode, it may be a
- multi-byte literal character. */
+ when the extended flag is set. If we are in a UTF mode, it may be a
+ multi-unit literal character. */
default:
NORMAL_CHAR:
@@ -6983,7 +7860,8 @@ for (;; ptr++)
*code++ = OP_PROP;
*code++ = PT_CLIST;
*code++ = c;
- if (firstcharflags == REQ_UNSET) firstcharflags = zerofirstcharflags = REQ_NONE;
+ if (firstcharflags == REQ_UNSET)
+ firstcharflags = zerofirstcharflags = REQ_NONE;
break;
}
}
@@ -7072,24 +7950,24 @@ out the amount of memory needed, as well as during the real compile phase. The
value of lengthptr distinguishes the two phases.
Arguments:
- options option bits, including any changes for this subpattern
- codeptr -> the address of the current code pointer
- ptrptr -> the address of the current pattern pointer
- errorcodeptr -> pointer to error code variable
- lookbehind TRUE if this is a lookbehind assertion
- reset_bracount TRUE to reset the count for each branch
- skipbytes skip this many bytes at start (for brackets and OP_COND)
- cond_depth depth of nesting for conditional subpatterns
- firstcharptr place to put the first required character
+ options option bits, including any changes for this subpattern
+ codeptr -> the address of the current code pointer
+ ptrptr -> the address of the current pattern pointer
+ errorcodeptr -> pointer to error code variable
+ lookbehind TRUE if this is a lookbehind assertion
+ reset_bracount TRUE to reset the count for each branch
+ skipbytes skip this many bytes at start (for brackets and OP_COND)
+ cond_depth depth of nesting for conditional subpatterns
+ firstcharptr place to put the first required character
firstcharflagsptr place to put the first character flags, or a negative number
- reqcharptr place to put the last required character
- reqcharflagsptr place to put the last required character flags, or a negative number
- bcptr pointer to the chain of currently open branches
- cd points to the data block with tables pointers etc.
- lengthptr NULL during the real compile phase
- points to length accumulator during pre-compile phase
-
-Returns: TRUE on success
+ reqcharptr place to put the last required character
+ reqcharflagsptr place to put the last required character flags, or a negative number
+ bcptr pointer to the chain of currently open branches
+ cd points to the data block with tables pointers etc.
+ lengthptr NULL during the real compile phase
+ points to length accumulator during pre-compile phase
+
+Returns: TRUE on success
*/
static BOOL
@@ -7540,9 +8418,9 @@ do {
switch (*scode)
{
case OP_CREF:
- case OP_NCREF:
+ case OP_DNCREF:
case OP_RREF:
- case OP_NRREF:
+ case OP_DNRREF:
case OP_DEF:
return FALSE;
@@ -7626,13 +8504,14 @@ return TRUE;
discarded, because they can cause conflicts with actual literals that follow.
However, if we end up without a first char setting for an unanchored pattern,
it is worth scanning the regex to see if there is an initial asserted first
-char. If all branches start with the same asserted char, or with a bracket all
-of whose alternatives start with the same asserted char (recurse ad lib), then
-we return that char, otherwise -1.
+char. If all branches start with the same asserted char, or with a
+non-conditional bracket all of whose alternatives start with the same asserted
+char (recurse ad lib), then we return that char, with the flags set to zero or
+REQ_CASELESS; otherwise return zero with REQ_NONE in the flags.
Arguments:
code points to start of expression (the bracket)
- flags points to the first char flags, or to REQ_NONE
+ flags points to the first char flags, or to REQ_NONE
inassert TRUE if in an assertion
Returns: the fixed first char, or 0 with REQ_NONE in flags
@@ -7669,7 +8548,6 @@ do {
case OP_ASSERT:
case OP_ONCE:
case OP_ONCE_NC:
- case OP_COND:
d = find_firstassertedchar(scode, &dflags, op == OP_ASSERT);
if (dflags < 0)
return 0;
@@ -7714,6 +8592,61 @@ return c;
/*************************************************
+* Add an entry to the name/number table *
+*************************************************/
+
+/* This function is called between compiling passes to add an entry to the
+name/number table, maintaining alphabetical order. Checking for permitted
+and forbidden duplicates has already been done.
+
+Arguments:
+ cd the compile data block
+ name the name to add
+ length the length of the name
+ groupno the group number
+
+Returns: nothing
+*/
+
+static void
+add_name(compile_data *cd, const pcre_uchar *name, int length,
+ unsigned int groupno)
+{
+int i;
+pcre_uchar *slot = cd->name_table;
+
+for (i = 0; i < cd->names_found; i++)
+ {
+ int crc = memcmp(name, slot+IMM2_SIZE, IN_UCHARS(length));
+ if (crc == 0 && slot[IMM2_SIZE+length] != 0)
+ crc = -1; /* Current name is a substring */
+
+ /* Make space in the table and break the loop for an earlier name. For a
+ duplicate or later name, carry on. We do this for duplicates so that in the
+ simple case (when ?(| is not used) they are in order of their numbers. In all
+ cases they are in the order in which they appear in the pattern. */
+
+ if (crc < 0)
+ {
+ memmove(slot + cd->name_entry_size, slot,
+ IN_UCHARS((cd->names_found - i) * cd->name_entry_size));
+ break;
+ }
+
+ /* Continue the loop for a later or duplicate name */
+
+ slot += cd->name_entry_size;
+ }
+
+PUT2(slot, 0, groupno);
+memcpy(slot + IMM2_SIZE, name, IN_UCHARS(length));
+slot[IMM2_SIZE + length] = 0;
+cd->names_found++;
+}
+
+
+
+/*************************************************
* Compile a Regular Expression *
*************************************************/
@@ -7775,12 +8708,15 @@ pcre32_compile2(PCRE_SPTR32 pattern, int options, int *errorcodeptr,
{
REAL_PCRE *re;
int length = 1; /* For final END opcode */
-pcre_uint32 firstchar, reqchar;
pcre_int32 firstcharflags, reqcharflags;
+pcre_uint32 firstchar, reqchar;
+pcre_uint32 limit_match = PCRE_UINT32_MAX;
+pcre_uint32 limit_recursion = PCRE_UINT32_MAX;
int newline;
int errorcode = 0;
int skipatstart = 0;
BOOL utf;
+BOOL never_utf = FALSE;
size_t size;
pcre_uchar *code;
const pcre_uchar *codestart;
@@ -7797,6 +8733,11 @@ new memory is obtained from malloc(). */
pcre_uchar cworkspace[COMPILE_WORK_SIZE];
+/* This vector is used for remembering name groups during the pre-compile. In a
+similar way to cworkspace, it can be expanded using malloc() if necessary. */
+
+named_group named_groups[NAMED_GROUP_LIST_SIZE];
+
/* Set this early so that early errors get offset 0. */
ptr = (const pcre_uchar *)pattern;
@@ -7840,9 +8781,15 @@ if ((options & ~PUBLIC_COMPILE_OPTIONS) != 0)
goto PCRE_EARLY_ERROR_RETURN;
}
+/* If PCRE_NEVER_UTF is set, remember it. */
+
+if ((options & PCRE_NEVER_UTF) != 0) never_utf = TRUE;
+
/* Check for global one-time settings at the start of the pattern, and remember
the offset for later. */
+cd->external_flags = 0; /* Initialize here for LIMIT_MATCH/RECURSION */
+
while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&
ptr[skipatstart+1] == CHAR_ASTERISK)
{
@@ -7870,9 +8817,49 @@ PCRE_UTF8 == PCRE_UTF16 == PCRE_UTF32. */
{ skipatstart += 6; options |= PCRE_UTF8; continue; }
else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UCP_RIGHTPAR, 4) == 0)
{ skipatstart += 6; options |= PCRE_UCP; continue; }
+ else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_NO_AUTO_POSSESS_RIGHTPAR, 16) == 0)
+ { skipatstart += 18; options |= PCRE_NO_AUTO_POSSESS; continue; }
else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_NO_START_OPT_RIGHTPAR, 13) == 0)
{ skipatstart += 15; options |= PCRE_NO_START_OPTIMIZE; continue; }
+ else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_LIMIT_MATCH_EQ, 12) == 0)
+ {
+ pcre_uint32 c = 0;
+ int p = skipatstart + 14;
+ while (isdigit(ptr[p]))
+ {
+ if (c > PCRE_UINT32_MAX / 10 - 1) break; /* Integer overflow */
+ c = c*10 + ptr[p++] - CHAR_0;
+ }
+ if (ptr[p++] != CHAR_RIGHT_PARENTHESIS) break;
+ if (c < limit_match)
+ {
+ limit_match = c;
+ cd->external_flags |= PCRE_MLSET;
+ }
+ skipatstart = p;
+ continue;
+ }
+
+ else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_LIMIT_RECURSION_EQ, 16) == 0)
+ {
+ pcre_uint32 c = 0;
+ int p = skipatstart + 18;
+ while (isdigit(ptr[p]))
+ {
+ if (c > PCRE_UINT32_MAX / 10 - 1) break; /* Integer overflow check */
+ c = c*10 + ptr[p++] - CHAR_0;
+ }
+ if (ptr[p++] != CHAR_RIGHT_PARENTHESIS) break;
+ if (c < limit_recursion)
+ {
+ limit_recursion = c;
+ cd->external_flags |= PCRE_RLSET;
+ }
+ skipatstart = p;
+ continue;
+ }
+
if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_CR_RIGHTPAR, 3) == 0)
{ skipatstart += 5; newnl = PCRE_NEWLINE_CR; }
else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_LF_RIGHTPAR, 3) == 0)
@@ -7898,6 +8885,11 @@ PCRE_UTF8 == PCRE_UTF16 == PCRE_UTF32. */
/* PCRE_UTF(16|32) have the same value as PCRE_UTF8. */
utf = (options & PCRE_UTF8) != 0;
+if (utf && never_utf)
+ {
+ errorcode = ERR78;
+ goto PCRE_EARLY_ERROR_RETURN2;
+ }
/* Can't support UTF unless PCRE has been compiled to include the code. The
return of an error code from PRIV(valid_utf)() is a new feature, introduced in
@@ -8010,17 +9002,21 @@ cd->bracount = cd->final_bracount = 0;
cd->names_found = 0;
cd->name_entry_size = 0;
cd->name_table = NULL;
+cd->dupnames = FALSE;
+cd->namedrefcount = 0;
cd->start_code = cworkspace;
cd->hwm = cworkspace;
cd->start_workspace = cworkspace;
cd->workspace_size = COMPILE_WORK_SIZE;
+cd->named_groups = named_groups;
+cd->named_group_list_size = NAMED_GROUP_LIST_SIZE;
cd->start_pattern = (const pcre_uchar *)pattern;
cd->end_pattern = (const pcre_uchar *)(pattern + STRLEN_UC((const pcre_uchar *)pattern));
cd->req_varyopt = 0;
+cd->parens_depth = 0;
cd->assert_depth = 0;
cd->max_lookbehind = 0;
cd->external_options = options;
-cd->external_flags = 0;
cd->open_caps = NULL;
/* Now do the pre-compile. On error, errorcode will be set non-zero, so we
@@ -8032,6 +9028,7 @@ outside can help speed up starting point checks. */
ptr += skipatstart;
code = cworkspace;
*code = OP_BRA;
+
(void)compile_regex(cd->external_options, &code, &ptr, &errorcode, FALSE,
FALSE, 0, 0, &firstchar, &firstcharflags, &reqchar, &reqcharflags, NULL,
cd, &length);
@@ -8046,14 +9043,23 @@ if (length > MAX_PATTERN_SIZE)
goto PCRE_EARLY_ERROR_RETURN;
}
-/* Compute the size of data block needed and get it, either from malloc or
-externally provided function. Integer overflow should no longer be possible
-because nowadays we limit the maximum value of cd->names_found and
-cd->name_entry_size. */
+/* If there are groups with duplicate names and there are also references by
+name, we must allow for the possibility of named references to duplicated
+groups. These require an extra data item each. */
-size = sizeof(REAL_PCRE) + (length + cd->names_found * cd->name_entry_size) * sizeof(pcre_uchar);
-re = (REAL_PCRE *)(PUBL(malloc))(size);
+if (cd->dupnames && cd->namedrefcount > 0)
+ length += cd->namedrefcount * IMM2_SIZE * sizeof(pcre_uchar);
+
+/* Compute the size of the data block for storing the compiled pattern. Integer
+overflow should no longer be possible because nowadays we limit the maximum
+value of cd->names_found and cd->name_entry_size. */
+
+size = sizeof(REAL_PCRE) +
+ (length + cd->names_found * cd->name_entry_size) * sizeof(pcre_uchar);
+
+/* Get the memory. */
+re = (REAL_PCRE *)(PUBL(malloc))(size);
if (re == NULL)
{
errorcode = ERR21;
@@ -8070,6 +9076,8 @@ re->magic_number = MAGIC_NUMBER;
re->size = (int)size;
re->options = cd->external_options;
re->flags = cd->external_flags;
+re->limit_match = limit_match;
+re->limit_recursion = limit_recursion;
re->first_char = 0;
re->req_char = 0;
re->name_table_offset = sizeof(REAL_PCRE) / sizeof(pcre_uchar);
@@ -8079,7 +9087,9 @@ re->ref_count = 0;
re->tables = (tables == PRIV(default_tables))? NULL : tables;
re->nullpad = NULL;
#ifdef COMPILE_PCRE32
-re->dummy1 = re->dummy2 = 0;
+re->dummy = 0;
+#else
+re->dummy1 = re->dummy2 = re->dummy3 = 0;
#endif
/* The starting points of the name/number translation table and of the code are
@@ -8090,10 +9100,10 @@ field; this time it's used for remembering forward references to subpatterns.
*/
cd->final_bracount = cd->bracount; /* Save for checking forward references */
+cd->parens_depth = 0;
cd->assert_depth = 0;
cd->bracount = 0;
cd->max_lookbehind = 0;
-cd->names_found = 0;
cd->name_table = (pcre_uchar *)re + re->name_table_offset;
codestart = cd->name_table + re->name_entry_size * re->name_count;
cd->start_code = codestart;
@@ -8104,6 +9114,20 @@ cd->had_pruneorskip = FALSE;
cd->check_lookbehind = FALSE;
cd->open_caps = NULL;
+/* If any named groups were found, create the name/number table from the list
+created in the first pass. */
+
+if (cd->names_found > 0)
+ {
+ int i = cd->names_found;
+ named_group *ng = cd->named_groups;
+ cd->names_found = 0;
+ for (; i > 0; i--, ng++)
+ add_name(cd, ng->name, ng->length, ng->number);
+ if (cd->named_group_list_size > NAMED_GROUP_LIST_SIZE)
+ (PUBL(free))((void *)cd->named_groups);
+ }
+
/* Set up a starting, non-extracting bracket, then compile the expression. On
error, errorcode will be set non-zero, so we don't need to look at the result
of the function here. */
@@ -8139,7 +9163,7 @@ if (code - codestart > length) errorcode = ERR23;
#ifdef SUPPORT_VALGRIND
/* If the estimated length exceeds the really used length, mark the extra
-allocated memory as unadressable, so that any out-of-bound reads can be
+allocated memory as unaddressable, so that any out-of-bound reads can be
detected. */
VALGRIND_MAKE_MEM_NOACCESS(code, (length - (code - codestart)) * sizeof(pcre_uchar));
#endif
@@ -8167,16 +9191,24 @@ if (cd->hwm > cd->start_workspace)
}
}
-/* If the workspace had to be expanded, free the new memory. */
+/* If the workspace had to be expanded, free the new memory. Set the pointer to
+NULL to indicate that forward references have been filled in. */
if (cd->workspace_size > COMPILE_WORK_SIZE)
(PUBL(free))((void *)cd->start_workspace);
+cd->start_workspace = NULL;
/* Give an error if there's back reference to a non-existent capturing
subpattern. */
if (errorcode == 0 && re->top_backref > re->top_bracket) errorcode = ERR15;
+/* Unless disabled, check whether single character iterators can be
+auto-possessified. The function overwrites the appropriate opcode values. */
+
+if ((options & PCRE_NO_AUTO_POSSESS) == 0)
+ auto_possessify((pcre_uchar *)codestart, utf, cd);
+
/* If there were any lookbehind assertions that contained OP_RECURSE
(recursions or subroutine calls), a flag is set for them to be checked here,
because they may contain forward references. Actual recursions cannot be fixed
@@ -8374,6 +9406,20 @@ if (code - codestart > length)
}
#endif /* PCRE_DEBUG */
+/* Check for a pattern than can match an empty string, so that this information
+can be provided to applications. */
+
+do
+ {
+ if (could_be_empty_branch(codestart, code, utf, cd, NULL))
+ {
+ re->flags |= PCRE_MATCH_EMPTY;
+ break;
+ }
+ codestart += GET(codestart, 1);
+ }
+while (*codestart == OP_ALT);
+
#if defined COMPILE_PCRE8
return (pcre *)re;
#elif defined COMPILE_PCRE16
@@ -8384,3 +9430,4 @@ return (pcre32 *)re;
}
/* End of pcre_compile.c */
+
diff --git a/src/3rdparty/pcre/pcre_config.c b/src/3rdparty/pcre/pcre_config.c
index db46c77a74..0ae23fdc9b 100644
--- a/src/3rdparty/pcre/pcre_config.c
+++ b/src/3rdparty/pcre/pcre_config.c
@@ -161,6 +161,10 @@ switch (what)
*((int *)where) = POSIX_MALLOC_THRESHOLD;
break;
+ case PCRE_CONFIG_PARENS_LIMIT:
+ *((unsigned long int *)where) = PARENS_NEST_LIMIT;
+ break;
+
case PCRE_CONFIG_MATCH_LIMIT:
*((unsigned long int *)where) = MATCH_LIMIT;
break;
diff --git a/src/3rdparty/pcre/pcre_dfa_exec.c b/src/3rdparty/pcre/pcre_dfa_exec.c
index adb1bbf3f5..243309789e 100644
--- a/src/3rdparty/pcre/pcre_dfa_exec.c
+++ b/src/3rdparty/pcre/pcre_dfa_exec.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language (but see
below for why this module is different).
Written by Philip Hazel
- Copyright (c) 1997-2012 University of Cambridge
+ Copyright (c) 1997-2013 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -120,7 +120,7 @@ static const pcre_uint8 coptable[] = {
0, 0, /* \P, \p */
0, 0, 0, 0, 0, /* \R, \H, \h, \V, \v */
0, /* \X */
- 0, 0, 0, 0, 0, 0, /* \Z, \z, ^, ^M, $, $M */
+ 0, 0, 0, 0, 0, 0, /* \Z, \z, $, $M, ^, ^M */
1, /* Char */
1, /* Chari */
1, /* not */
@@ -151,11 +151,14 @@ static const pcre_uint8 coptable[] = {
/* Character class & ref repeats */
0, 0, 0, 0, 0, 0, /* *, *?, +, +?, ?, ?? */
0, 0, /* CRRANGE, CRMINRANGE */
+ 0, 0, 0, 0, /* Possessive *+, ++, ?+, CRPOSRANGE */
0, /* CLASS */
0, /* NCLASS */
0, /* XCLASS - variable length */
0, /* REF */
0, /* REFI */
+ 0, /* DNREF */
+ 0, /* DNREFI */
0, /* RECURSE */
0, /* CALLOUT */
0, /* Alt */
@@ -171,8 +174,8 @@ static const pcre_uint8 coptable[] = {
0, 0, /* ONCE, ONCE_NC */
0, 0, 0, 0, 0, /* BRA, BRAPOS, CBRA, CBRAPOS, COND */
0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */
- 0, 0, /* CREF, NCREF */
- 0, 0, /* RREF, NRREF */
+ 0, 0, /* CREF, DNCREF */
+ 0, 0, /* RREF, DNRREF */
0, /* DEF */
0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */
0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */
@@ -194,7 +197,7 @@ static const pcre_uint8 poptable[] = {
1, 1, /* \P, \p */
1, 1, 1, 1, 1, /* \R, \H, \h, \V, \v */
1, /* \X */
- 0, 0, 0, 0, 0, 0, /* \Z, \z, ^, ^M, $, $M */
+ 0, 0, 0, 0, 0, 0, /* \Z, \z, $, $M, ^, ^M */
1, /* Char */
1, /* Chari */
1, /* not */
@@ -220,11 +223,14 @@ static const pcre_uint8 poptable[] = {
/* Character class & ref repeats */
1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */
1, 1, /* CRRANGE, CRMINRANGE */
+ 1, 1, 1, 1, /* Possessive *+, ++, ?+, CRPOSRANGE */
1, /* CLASS */
1, /* NCLASS */
1, /* XCLASS - variable length */
0, /* REF */
0, /* REFI */
+ 0, /* DNREF */
+ 0, /* DNREFI */
0, /* RECURSE */
0, /* CALLOUT */
0, /* Alt */
@@ -240,8 +246,8 @@ static const pcre_uint8 poptable[] = {
0, 0, /* ONCE, ONCE_NC */
0, 0, 0, 0, 0, /* BRA, BRAPOS, CBRA, CBRAPOS, COND */
0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */
- 0, 0, /* CREF, NCREF */
- 0, 0, /* RREF, NRREF */
+ 0, 0, /* CREF, DNCREF */
+ 0, 0, /* RREF, DNRREF */
0, /* DEF */
0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */
0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */
@@ -636,7 +642,7 @@ for (;;)
const pcre_uchar *code;
int state_offset = current_state->offset;
int codevalue, rrc;
- unsigned int count;
+ int count;
#ifdef PCRE_DEBUG
printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
@@ -1094,15 +1100,23 @@ for (;;)
PRIV(ucp_gentype)[prop->chartype] == ucp_N;
break;
- case PT_SPACE: /* Perl space */
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
- c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
- break;
+ /* Perl space used to exclude VT, but from Perl 5.18 it is included,
+ which means that Perl space and POSIX space are now identical. PCRE
+ was changed at release 8.34. */
+ case PT_SPACE: /* Perl space */
case PT_PXSPACE: /* POSIX space */
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
- c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
- c == CHAR_FF || c == CHAR_CR;
+ switch(c)
+ {
+ HSPACE_CASES:
+ VSPACE_CASES:
+ OK = TRUE;
+ break;
+
+ default:
+ OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
+ break;
+ }
break;
case PT_WORD:
@@ -1120,6 +1134,12 @@ for (;;)
}
break;
+ case PT_UCNC:
+ OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
+ c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
+ c >= 0xe000;
+ break;
+
/* Should never occur, but keep compilers from grumbling. */
default:
@@ -1249,7 +1269,7 @@ for (;;)
(d != OP_ANY || !IS_NEWLINE(ptr)) &&
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
{
- if (++count >= GET2(code, 1))
+ if (++count >= (int)GET2(code, 1))
{ ADD_NEW(state_offset + 1 + IMM2_SIZE + 1, 0); }
else
{ ADD_NEW(state_offset, count); }
@@ -1283,7 +1303,7 @@ for (;;)
active_count--; /* Remove non-match possibility */
next_active_state--;
}
- if (++count >= GET2(code, 1))
+ if (++count >= (int)GET2(code, 1))
{ ADD_NEW(state_offset + 2 + IMM2_SIZE, 0); }
else
{ ADD_NEW(state_offset, count); }
@@ -1338,15 +1358,23 @@ for (;;)
PRIV(ucp_gentype)[prop->chartype] == ucp_N;
break;
- case PT_SPACE: /* Perl space */
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
- c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
- break;
+ /* Perl space used to exclude VT, but from Perl 5.18 it is included,
+ which means that Perl space and POSIX space are now identical. PCRE
+ was changed at release 8.34. */
+ case PT_SPACE: /* Perl space */
case PT_PXSPACE: /* POSIX space */
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
- c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
- c == CHAR_FF || c == CHAR_CR;
+ switch(c)
+ {
+ HSPACE_CASES:
+ VSPACE_CASES:
+ OK = TRUE;
+ break;
+
+ default:
+ OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
+ break;
+ }
break;
case PT_WORD:
@@ -1364,6 +1392,12 @@ for (;;)
}
break;
+ case PT_UCNC:
+ OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
+ c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
+ c >= 0xe000;
+ break;
+
/* Should never occur, but keep compilers from grumbling. */
default:
@@ -1576,15 +1610,23 @@ for (;;)
PRIV(ucp_gentype)[prop->chartype] == ucp_N;
break;
- case PT_SPACE: /* Perl space */
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
- c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
- break;
+ /* Perl space used to exclude VT, but from Perl 5.18 it is included,
+ which means that Perl space and POSIX space are now identical. PCRE
+ was changed at release 8.34. */
+ case PT_SPACE: /* Perl space */
case PT_PXSPACE: /* POSIX space */
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
- c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
- c == CHAR_FF || c == CHAR_CR;
+ switch(c)
+ {
+ HSPACE_CASES:
+ VSPACE_CASES:
+ OK = TRUE;
+ break;
+
+ default:
+ OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
+ break;
+ }
break;
case PT_WORD:
@@ -1602,6 +1644,12 @@ for (;;)
}
break;
+ case PT_UCNC:
+ OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
+ c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
+ c >= 0xe000;
+ break;
+
/* Should never occur, but keep compilers from grumbling. */
default:
@@ -1705,7 +1753,7 @@ for (;;)
active_count--; /* Remove non-match possibility */
next_active_state--;
}
- ADD_NEW_DATA(-(state_offset + count), 0, ncount);
+ ADD_NEW_DATA(-(state_offset + (int)count), 0, ncount);
break;
default:
@@ -1749,7 +1797,7 @@ for (;;)
active_count--; /* Remove non-match possibility */
next_active_state--;
}
- ADD_NEW_DATA(-(state_offset + count), 0, 0);
+ ADD_NEW_DATA(-(state_offset + (int)count), 0, 0);
}
}
break;
@@ -1790,7 +1838,7 @@ for (;;)
active_count--; /* Remove non-match possibility */
next_active_state--;
}
- ADD_NEW_DATA(-(state_offset + count), 0, 0);
+ ADD_NEW_DATA(-(state_offset + (int)count), 0, 0);
}
}
break;
@@ -1839,15 +1887,23 @@ for (;;)
PRIV(ucp_gentype)[prop->chartype] == ucp_N;
break;
- case PT_SPACE: /* Perl space */
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
- c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
- break;
+ /* Perl space used to exclude VT, but from Perl 5.18 it is included,
+ which means that Perl space and POSIX space are now identical. PCRE
+ was changed at release 8.34. */
+ case PT_SPACE: /* Perl space */
case PT_PXSPACE: /* POSIX space */
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
- c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
- c == CHAR_FF || c == CHAR_CR;
+ switch(c)
+ {
+ HSPACE_CASES:
+ VSPACE_CASES:
+ OK = TRUE;
+ break;
+
+ default:
+ OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
+ break;
+ }
break;
case PT_WORD:
@@ -1865,6 +1921,12 @@ for (;;)
}
break;
+ case PT_UCNC:
+ OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
+ c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
+ c >= 0xe000;
+ break;
+
/* Should never occur, but keep compilers from grumbling. */
default:
@@ -1879,7 +1941,7 @@ for (;;)
active_count--; /* Remove non-match possibility */
next_active_state--;
}
- if (++count >= GET2(code, 1))
+ if (++count >= (int)GET2(code, 1))
{ ADD_NEW(state_offset + 1 + IMM2_SIZE + 3, 0); }
else
{ ADD_NEW(state_offset, count); }
@@ -1918,7 +1980,7 @@ for (;;)
}
if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
reset_could_continue = TRUE;
- if (++count >= GET2(code, 1))
+ if (++count >= (int)GET2(code, 1))
{ ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
else
{ ADD_NEW_DATA(-state_offset, count, ncount); }
@@ -1960,7 +2022,7 @@ for (;;)
active_count--; /* Remove non-match possibility */
next_active_state--;
}
- if (++count >= GET2(code, 1))
+ if (++count >= (int)GET2(code, 1))
{ ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
else
{ ADD_NEW_DATA(-state_offset, count, ncount); }
@@ -2000,7 +2062,7 @@ for (;;)
active_count--; /* Remove non-match possibility */
next_active_state--;
}
- if (++count >= GET2(code, 1))
+ if (++count >= (int)GET2(code, 1))
{ ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
else
{ ADD_NEW_DATA(-state_offset, count, 0); }
@@ -2037,7 +2099,7 @@ for (;;)
active_count--; /* Remove non-match possibility */
next_active_state--;
}
- if (++count >= GET2(code, 1))
+ if (++count >= (int)GET2(code, 1))
{ ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
else
{ ADD_NEW_DATA(-state_offset, count, 0); }
@@ -2407,7 +2469,7 @@ for (;;)
}
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
{
- if (++count >= GET2(code, 1))
+ if (++count >= (int)GET2(code, 1))
{ ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
else
{ ADD_NEW(state_offset, count); }
@@ -2456,7 +2518,7 @@ for (;;)
active_count--; /* Remove non-match possibility */
next_active_state--;
}
- if (++count >= GET2(code, 1))
+ if (++count >= (int)GET2(code, 1))
{ ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
else
{ ADD_NEW(state_offset, count); }
@@ -2509,31 +2571,65 @@ for (;;)
{
case OP_CRSTAR:
case OP_CRMINSTAR:
+ case OP_CRPOSSTAR:
ADD_ACTIVE(next_state_offset + 1, 0);
- if (isinclass) { ADD_NEW(state_offset, 0); }
+ if (isinclass)
+ {
+ if (*ecode == OP_CRPOSSTAR)
+ {
+ active_count--; /* Remove non-match possibility */
+ next_active_state--;
+ }
+ ADD_NEW(state_offset, 0);
+ }
break;
case OP_CRPLUS:
case OP_CRMINPLUS:
+ case OP_CRPOSPLUS:
count = current_state->count; /* Already matched */
if (count > 0) { ADD_ACTIVE(next_state_offset + 1, 0); }
- if (isinclass) { count++; ADD_NEW(state_offset, count); }
+ if (isinclass)
+ {
+ if (count > 0 && *ecode == OP_CRPOSPLUS)
+ {
+ active_count--; /* Remove non-match possibility */
+ next_active_state--;
+ }
+ count++;
+ ADD_NEW(state_offset, count);
+ }
break;
case OP_CRQUERY:
case OP_CRMINQUERY:
+ case OP_CRPOSQUERY:
ADD_ACTIVE(next_state_offset + 1, 0);
- if (isinclass) { ADD_NEW(next_state_offset + 1, 0); }
+ if (isinclass)
+ {
+ if (*ecode == OP_CRPOSQUERY)
+ {
+ active_count--; /* Remove non-match possibility */
+ next_active_state--;
+ }
+ ADD_NEW(next_state_offset + 1, 0);
+ }
break;
case OP_CRRANGE:
case OP_CRMINRANGE:
+ case OP_CRPOSRANGE:
count = current_state->count; /* Already matched */
- if (count >= GET2(ecode, 1))
+ if (count >= (int)GET2(ecode, 1))
{ ADD_ACTIVE(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
if (isinclass)
{
- unsigned int max = GET2(ecode, 1 + IMM2_SIZE);
+ int max = (int)GET2(ecode, 1 + IMM2_SIZE);
+ if (*ecode == OP_CRPOSRANGE)
+ {
+ active_count--; /* Remove non-match possibility */
+ next_active_state--;
+ }
if (++count >= max && max != 0) /* Max 0 => no limit */
{ ADD_NEW(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
else
@@ -2633,9 +2729,11 @@ for (;;)
condcode = code[LINK_SIZE+1];
- /* Back reference conditions are not supported */
+ /* Back reference conditions and duplicate named recursion conditions
+ are not supported */
- if (condcode == OP_CREF || condcode == OP_NCREF)
+ if (condcode == OP_CREF || condcode == OP_DNCREF ||
+ condcode == OP_DNRREF)
return PCRE_ERROR_DFA_UCOND;
/* The DEFINE condition is always false */
@@ -2647,7 +2745,7 @@ for (;;)
which means "test if in any recursion". We can't test for specifically
recursed groups. */
- else if (condcode == OP_RREF || condcode == OP_NRREF)
+ else if (condcode == OP_RREF)
{
int value = GET2(code, LINK_SIZE + 2);
if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
@@ -3023,15 +3121,7 @@ for (;;)
ptr > md->start_used_ptr) /* Inspected non-empty string */
)
)
- {
- if (offsetcount >= 2)
- {
- offsets[0] = (int)(md->start_used_ptr - start_subject);
- offsets[1] = (int)(end_subject - start_subject);
- }
match_count = PCRE_ERROR_PARTIAL;
- }
-
DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
"%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count,
rlevel*2-2, SP));
@@ -3545,7 +3635,17 @@ for (;;)
/* Anything other than "no match" means we are done, always; otherwise, carry
on only if not anchored. */
- if (rc != PCRE_ERROR_NOMATCH || anchored) return rc;
+ if (rc != PCRE_ERROR_NOMATCH || anchored)
+ {
+ if (rc == PCRE_ERROR_PARTIAL && offsetcount >= 2)
+ {
+ offsets[0] = (int)(md->start_used_ptr - (PCRE_PUCHAR)subject);
+ offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
+ if (offsetcount > 2)
+ offsets[2] = (int)(current_subject - (PCRE_PUCHAR)subject);
+ }
+ return rc;
+ }
/* Advance to the next subject character unless we are at the end of a line
and firstline is set. */
diff --git a/src/3rdparty/pcre/pcre_exec.c b/src/3rdparty/pcre/pcre_exec.c
index c888468a25..913521ff0c 100644
--- a/src/3rdparty/pcre/pcre_exec.c
+++ b/src/3rdparty/pcre/pcre_exec.c
@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
- Copyright (c) 1997-2012 University of Cambridge
+ Copyright (c) 1997-2013 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -56,6 +56,20 @@ possible. There are also some static supporting functions. */
#undef min
#undef max
+/* The md->capture_last field uses the lower 16 bits for the last captured
+substring (which can never be greater than 65535) and a bit in the top half
+to mean "capture vector overflowed". This odd way of doing things was
+implemented when it was realized that preserving and restoring the overflow bit
+whenever the last capture number was saved/restored made for a neater
+interface, and doing it this way saved on (a) another variable, which would
+have increased the stack frame size (a big NO-NO in PCRE) and (b) another
+separate set of save/restore instructions. The following defines are used in
+implementing this. */
+
+#define CAPLMASK 0x0000ffff /* The bits used for last_capture */
+#define OVFLMASK 0xffff0000 /* The bits used for the overflow flag */
+#define OVFLBIT 0x00010000 /* The bit that is set for overflow */
+
/* Values for setting in md->match_function_type to indicate two special types
of call to match(). We do it this way to save on using another stack variable,
as stack usage is to be discouraged. */
@@ -73,13 +87,17 @@ defined PCRE_ERROR_xxx codes, which are all negative. */
negative to avoid the external error codes. */
#define MATCH_ACCEPT (-999)
-#define MATCH_COMMIT (-998)
-#define MATCH_KETRPOS (-997)
-#define MATCH_ONCE (-996)
+#define MATCH_KETRPOS (-998)
+#define MATCH_ONCE (-997)
+/* The next 5 must be kept together and in sequence so that a test that checks
+for any one of them can use a range. */
+#define MATCH_COMMIT (-996)
#define MATCH_PRUNE (-995)
#define MATCH_SKIP (-994)
#define MATCH_SKIP_ARG (-993)
#define MATCH_THEN (-992)
+#define MATCH_BACKTRACK_MAX MATCH_THEN
+#define MATCH_BACKTRACK_MIN MATCH_COMMIT
/* Maximum number of ints of offset to save on the stack for recursive calls.
If the offset vector is bigger, malloc is used. This should be a multiple of 3,
@@ -89,8 +107,8 @@ because the offset vector is always a multiple of 3 long. */
/* Min and max values for the common repeats; for the maxima, 0 => infinity */
-static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
-static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
+static const char rep_min[] = { 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, };
+static const char rep_max[] = { 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, };
#ifdef PCRE_DEBUG
/*************************************************
@@ -149,7 +167,7 @@ match_ref(int offset, register PCRE_PUCHAR eptr, int length, match_data *md,
{
PCRE_PUCHAR eptr_start = eptr;
register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];
-#ifdef SUPPORT_UTF
+#if defined SUPPORT_UTF && defined SUPPORT_UCP
BOOL utf = md->utf;
#endif
@@ -177,8 +195,7 @@ ASCII characters. */
if (caseless)
{
-#ifdef SUPPORT_UTF
-#ifdef SUPPORT_UCP
+#if defined SUPPORT_UTF && defined SUPPORT_UCP
if (utf)
{
/* Match characters up to the end of the reference. NOTE: the number of
@@ -212,14 +229,13 @@ if (caseless)
}
else
#endif
-#endif
/* The same code works when not in UTF-8 mode and in UTF-8 mode when there
is no UCP support. */
{
while (length-- > 0)
{
- pcre_uchar cc, cp;
+ pcre_uint32 cc, cp;
if (eptr >= md->end_subject) return -2; /* Partial match */
cc = RAWUCHARTEST(eptr);
cp = RAWUCHARTEST(p);
@@ -416,10 +432,10 @@ typedef struct heapframe {
int Xlength;
int Xmax;
int Xmin;
- int Xnumber;
+ unsigned int Xnumber;
int Xoffset;
- int Xop;
- int Xsave_capture_last;
+ unsigned int Xop;
+ pcre_int32 Xsave_capture_last;
int Xsave_offset1, Xsave_offset2, Xsave_offset3;
int Xstacksave[REC_STACK_SAVE_MAX];
@@ -634,8 +650,8 @@ int max;
int min;
unsigned int number;
int offset;
-pcre_uchar op;
-int save_capture_last;
+unsigned int op;
+pcre_int32 save_capture_last;
int save_offset1, save_offset2, save_offset3;
int stacksave[REC_STACK_SAVE_MAX];
@@ -763,23 +779,16 @@ for (;;)
case OP_FAIL:
RRETURN(MATCH_NOMATCH);
- /* COMMIT overrides PRUNE, SKIP, and THEN */
-
case OP_COMMIT:
RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
eptrb, RM52);
- if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
- rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
- rrc != MATCH_THEN)
- RRETURN(rrc);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
RRETURN(MATCH_COMMIT);
- /* PRUNE overrides THEN */
-
case OP_PRUNE:
RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
eptrb, RM51);
- if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
RRETURN(MATCH_PRUNE);
case OP_PRUNE_ARG:
@@ -789,38 +798,39 @@ for (;;)
eptrb, RM56);
if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
md->mark == NULL) md->mark = ecode + 2;
- if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
RRETURN(MATCH_PRUNE);
- /* SKIP overrides PRUNE and THEN */
-
case OP_SKIP:
RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
eptrb, RM53);
- if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
- RRETURN(rrc);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
md->start_match_ptr = eptr; /* Pass back current position */
RRETURN(MATCH_SKIP);
/* Note that, for Perl compatibility, SKIP with an argument does NOT set
- nomatch_mark. There is a flag that disables this opcode when re-matching a
- pattern that ended with a SKIP for which there was not a matching MARK. */
+ nomatch_mark. When a pattern match ends with a SKIP_ARG for which there was
+ not a matching mark, we have to re-run the match, ignoring the SKIP_ARG
+ that failed and any that precede it (either they also failed, or were not
+ triggered). To do this, we maintain a count of executed SKIP_ARGs. If a
+ SKIP_ARG gets to top level, the match is re-run with md->ignore_skip_arg
+ set to the count of the one that failed. */
case OP_SKIP_ARG:
- if (md->ignore_skip_arg)
+ md->skip_arg_count++;
+ if (md->skip_arg_count <= md->ignore_skip_arg)
{
ecode += PRIV(OP_lengths)[*ecode] + ecode[1];
break;
}
RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
eptrb, RM57);
- if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
- RRETURN(rrc);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
/* Pass back the current skip name by overloading md->start_match_ptr and
returning the special MATCH_SKIP_ARG return code. This will either be
caught by a matching MARK, or get to the top, where it causes a rematch
- with the md->ignore_skip_arg flag set. */
+ with md->ignore_skip_arg set to the value of md->skip_arg_count. */
md->start_match_ptr = ecode + 2;
RRETURN(MATCH_SKIP_ARG);
@@ -1066,6 +1076,7 @@ for (;;)
/* In all other cases, we have to make another call to match(). */
save_mark = md->mark;
+ save_capture_last = md->capture_last;
RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,
RM2);
@@ -1097,6 +1108,7 @@ for (;;)
ecode += GET(ecode, 1);
md->mark = save_mark;
if (*ecode != OP_ALT) break;
+ md->capture_last = save_capture_last;
}
RRETURN(MATCH_NOMATCH);
@@ -1159,6 +1171,7 @@ for (;;)
ecode = md->start_code + code_offset;
save_capture_last = md->capture_last;
matched_once = TRUE;
+ mstart = md->start_match_ptr; /* In case \K changed it */
continue;
}
@@ -1218,6 +1231,7 @@ for (;;)
POSSESSIVE_NON_CAPTURE:
matched_once = FALSE;
code_offset = (int)(ecode - md->start_code);
+ save_capture_last = md->capture_last;
for (;;)
{
@@ -1230,6 +1244,7 @@ for (;;)
eptr = md->end_match_ptr;
ecode = md->start_code + code_offset;
matched_once = TRUE;
+ mstart = md->start_match_ptr; /* In case \K reset it */
continue;
}
@@ -1247,6 +1262,7 @@ for (;;)
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
ecode += GET(ecode, 1);
if (*ecode != OP_ALT) break;
+ md->capture_last = save_capture_last;
}
if (matched_once || allow_zero)
@@ -1258,25 +1274,32 @@ for (;;)
/* Control never reaches here. */
- /* Conditional group: compilation checked that there are no more than
- two branches. If the condition is false, skipping the first branch takes us
- past the end if there is only one branch, but that's OK because that is
- exactly what going to the ket would do. */
+ /* Conditional group: compilation checked that there are no more than two
+ branches. If the condition is false, skipping the first branch takes us
+ past the end of the item if there is only one branch, but that's exactly
+ what we want. */
case OP_COND:
case OP_SCOND:
- codelink = GET(ecode, 1);
+
+ /* The variable codelink will be added to ecode when the condition is
+ false, to get to the second branch. Setting it to the offset to the ALT
+ or KET, then incrementing ecode achieves this effect. We now have ecode
+ pointing to the condition or callout. */
+
+ codelink = GET(ecode, 1); /* Offset to the second branch */
+ ecode += 1 + LINK_SIZE; /* From this opcode */
/* Because of the way auto-callout works during compile, a callout item is
inserted between OP_COND and an assertion condition. */
- if (ecode[LINK_SIZE+1] == OP_CALLOUT)
+ if (*ecode == OP_CALLOUT)
{
if (PUBL(callout) != NULL)
{
PUBL(callout_block) cb;
cb.version = 2; /* Version 1 of the callout block */
- cb.callout_number = ecode[LINK_SIZE+2];
+ cb.callout_number = ecode[1];
cb.offset_vector = md->offset_vector;
#if defined COMPILE_PCRE8
cb.subject = (PCRE_SPTR)md->start_subject;
@@ -1288,215 +1311,130 @@ for (;;)
cb.subject_length = (int)(md->end_subject - md->start_subject);
cb.start_match = (int)(mstart - md->start_subject);
cb.current_position = (int)(eptr - md->start_subject);
- cb.pattern_position = GET(ecode, LINK_SIZE + 3);
- cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
+ cb.pattern_position = GET(ecode, 2);
+ cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
cb.capture_top = offset_top/2;
- cb.capture_last = md->capture_last;
+ cb.capture_last = md->capture_last & CAPLMASK;
+ /* Internal change requires this for API compatibility. */
+ if (cb.capture_last == 0) cb.capture_last = -1;
cb.callout_data = md->callout_data;
cb.mark = md->nomatch_mark;
if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
if (rrc < 0) RRETURN(rrc);
}
+
+ /* Advance ecode past the callout, so it now points to the condition. We
+ must adjust codelink so that the value of ecode+codelink is unchanged. */
+
ecode += PRIV(OP_lengths)[OP_CALLOUT];
+ codelink -= PRIV(OP_lengths)[OP_CALLOUT];
}
- condcode = ecode[LINK_SIZE+1];
+ /* Test the various possible conditions */
- /* Now see what the actual condition is */
-
- if (condcode == OP_RREF || condcode == OP_NRREF) /* Recursion test */
+ condition = FALSE;
+ switch(condcode = *ecode)
{
- if (md->recursive == NULL) /* Not recursing => FALSE */
- {
- condition = FALSE;
- ecode += GET(ecode, 1);
- }
- else
+ case OP_RREF: /* Numbered group recursion test */
+ if (md->recursive != NULL) /* Not recursing => FALSE */
{
- unsigned int recno = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/
+ unsigned int recno = GET2(ecode, 1); /* Recursion group number*/
condition = (recno == RREF_ANY || recno == md->recursive->group_num);
+ }
+ break;
- /* If the test is for recursion into a specific subpattern, and it is
- false, but the test was set up by name, scan the table to see if the
- name refers to any other numbers, and test them. The condition is true
- if any one is set. */
-
- if (!condition && condcode == OP_NRREF)
+ case OP_DNRREF: /* Duplicate named group recursion test */
+ if (md->recursive != NULL)
+ {
+ int count = GET2(ecode, 1 + IMM2_SIZE);
+ pcre_uchar *slot = md->name_table + GET2(ecode, 1) * md->name_entry_size;
+ while (count-- > 0)
{
- pcre_uchar *slotA = md->name_table;
- for (i = 0; i < md->name_count; i++)
- {
- if (GET2(slotA, 0) == recno) break;
- slotA += md->name_entry_size;
- }
-
- /* Found a name for the number - there can be only one; duplicate
- names for different numbers are allowed, but not vice versa. First
- scan down for duplicates. */
-
- if (i < md->name_count)
- {
- pcre_uchar *slotB = slotA;
- while (slotB > md->name_table)
- {
- slotB -= md->name_entry_size;
- if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
- {
- condition = GET2(slotB, 0) == md->recursive->group_num;
- if (condition) break;
- }
- else break;
- }
-
- /* Scan up for duplicates */
-
- if (!condition)
- {
- slotB = slotA;
- for (i++; i < md->name_count; i++)
- {
- slotB += md->name_entry_size;
- if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
- {
- condition = GET2(slotB, 0) == md->recursive->group_num;
- if (condition) break;
- }
- else break;
- }
- }
- }
+ unsigned int recno = GET2(slot, 0);
+ condition = recno == md->recursive->group_num;
+ if (condition) break;
+ slot += md->name_entry_size;
}
-
- /* Chose branch according to the condition */
-
- ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
}
- }
+ break;
- else if (condcode == OP_CREF || condcode == OP_NCREF) /* Group used test */
- {
- offset = GET2(ecode, LINK_SIZE+2) << 1; /* Doubled ref number */
+ case OP_CREF: /* Numbered group used test */
+ offset = GET2(ecode, 1) << 1; /* Doubled ref number */
condition = offset < offset_top && md->offset_vector[offset] >= 0;
+ break;
- /* If the numbered capture is unset, but the reference was by name,
- scan the table to see if the name refers to any other numbers, and test
- them. The condition is true if any one is set. This is tediously similar
- to the code above, but not close enough to try to amalgamate. */
-
- if (!condition && condcode == OP_NCREF)
+ case OP_DNCREF: /* Duplicate named group used test */
{
- unsigned int refno = offset >> 1;
- pcre_uchar *slotA = md->name_table;
-
- for (i = 0; i < md->name_count; i++)
+ int count = GET2(ecode, 1 + IMM2_SIZE);
+ pcre_uchar *slot = md->name_table + GET2(ecode, 1) * md->name_entry_size;
+ while (count-- > 0)
{
- if (GET2(slotA, 0) == refno) break;
- slotA += md->name_entry_size;
- }
-
- /* Found a name for the number - there can be only one; duplicate names
- for different numbers are allowed, but not vice versa. First scan down
- for duplicates. */
-
- if (i < md->name_count)
- {
- pcre_uchar *slotB = slotA;
- while (slotB > md->name_table)
- {
- slotB -= md->name_entry_size;
- if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
- {
- offset = GET2(slotB, 0) << 1;
- condition = offset < offset_top &&
- md->offset_vector[offset] >= 0;
- if (condition) break;
- }
- else break;
- }
-
- /* Scan up for duplicates */
-
- if (!condition)
- {
- slotB = slotA;
- for (i++; i < md->name_count; i++)
- {
- slotB += md->name_entry_size;
- if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
- {
- offset = GET2(slotB, 0) << 1;
- condition = offset < offset_top &&
- md->offset_vector[offset] >= 0;
- if (condition) break;
- }
- else break;
- }
- }
+ offset = GET2(slot, 0) << 1;
+ condition = offset < offset_top && md->offset_vector[offset] >= 0;
+ if (condition) break;
+ slot += md->name_entry_size;
}
}
+ break;
- /* Chose branch according to the condition */
-
- ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
- }
-
- else if (condcode == OP_DEF) /* DEFINE - always false */
- {
- condition = FALSE;
- ecode += GET(ecode, 1);
- }
+ case OP_DEF: /* DEFINE - always false */
+ break;
- /* The condition is an assertion. Call match() to evaluate it - setting
- md->match_function_type to MATCH_CONDASSERT causes it to stop at the end of
- an assertion. */
+ /* The condition is an assertion. Call match() to evaluate it - setting
+ md->match_function_type to MATCH_CONDASSERT causes it to stop at the end
+ of an assertion. */
- else
- {
+ default:
md->match_function_type = MATCH_CONDASSERT;
- RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM3);
+ RMATCH(eptr, ecode, offset_top, md, NULL, RM3);
if (rrc == MATCH_MATCH)
{
if (md->end_offset_top > offset_top)
offset_top = md->end_offset_top; /* Captures may have happened */
condition = TRUE;
- ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
+
+ /* Advance ecode past the assertion to the start of the first branch,
+ but adjust it so that the general choosing code below works. */
+
+ ecode += GET(ecode, 1);
while (*ecode == OP_ALT) ecode += GET(ecode, 1);
+ ecode += 1 + LINK_SIZE - PRIV(OP_lengths)[condcode];
}
/* PCRE doesn't allow the effect of (*THEN) to escape beyond an
- assertion; it is therefore treated as NOMATCH. */
+ assertion; it is therefore treated as NOMATCH. Any other return is an
+ error. */
else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
{
RRETURN(rrc); /* Need braces because of following else */
}
- else
- {
- condition = FALSE;
- ecode += codelink;
- }
+ break;
}
- /* We are now at the branch that is to be obeyed. As there is only one, can
- use tail recursion to avoid using another stack frame, except when there is
- unlimited repeat of a possibly empty group. In the latter case, a recursive
- call to match() is always required, unless the second alternative doesn't
- exist, in which case we can just plough on. Note that, for compatibility
- with Perl, the | in a conditional group is NOT treated as creating two
- alternatives. If a THEN is encountered in the branch, it propagates out to
- the enclosing alternative (unless nested in a deeper set of alternatives,
- of course). */
-
- if (condition || *ecode == OP_ALT)
+ /* Choose branch according to the condition */
+
+ ecode += condition? PRIV(OP_lengths)[condcode] : codelink;
+
+ /* We are now at the branch that is to be obeyed. As there is only one, we
+ can use tail recursion to avoid using another stack frame, except when
+ there is unlimited repeat of a possibly empty group. In the latter case, a
+ recursive call to match() is always required, unless the second alternative
+ doesn't exist, in which case we can just plough on. Note that, for
+ compatibility with Perl, the | in a conditional group is NOT treated as
+ creating two alternatives. If a THEN is encountered in the branch, it
+ propagates out to the enclosing alternative (unless nested in a deeper set
+ of alternatives, of course). */
+
+ if (condition || ecode[-(1+LINK_SIZE)] == OP_ALT)
{
if (op != OP_SCOND)
{
- ecode += 1 + LINK_SIZE;
goto TAIL_RECURSE;
}
md->match_function_type = MATCH_CBEGROUP;
- RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM49);
+ RMATCH(eptr, ecode, offset_top, md, eptrb, RM49);
RRETURN(rrc);
}
@@ -1504,7 +1442,6 @@ for (;;)
else
{
- ecode += 1 + LINK_SIZE;
}
break;
@@ -1513,7 +1450,7 @@ for (;;)
to close any currently open capturing brackets. */
case OP_CLOSE:
- number = GET2(ecode, 1);
+ number = GET2(ecode, 1); /* Must be less than 65536 */
offset = number << 1;
#ifdef PCRE_DEBUG
@@ -1521,8 +1458,8 @@ for (;;)
printf("\n");
#endif
- md->capture_last = number;
- if (offset >= md->offset_max) md->offset_overflow = TRUE; else
+ md->capture_last = (md->capture_last & OVFLMASK) | number;
+ if (offset >= md->offset_max) md->capture_last |= OVFLBIT; else
{
md->offset_vector[offset] =
md->offset_vector[md->offset_end - number];
@@ -1584,28 +1521,49 @@ for (;;)
}
else condassert = FALSE;
+ /* Loop for each branch */
+
do
{
RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM4);
+
+ /* A match means that the assertion is true; break out of the loop
+ that matches its alternatives. */
+
if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
{
mstart = md->start_match_ptr; /* In case \K reset it */
break;
}
+
+ /* If not matched, restore the previous mark setting. */
+
md->mark = save_mark;
- /* A COMMIT failure must fail the entire assertion, without trying any
- subsequent branches. */
+ /* See comment in the code for capturing groups above about handling
+ THEN. */
- if (rrc == MATCH_COMMIT) RRETURN(MATCH_NOMATCH);
+ if (rrc == MATCH_THEN)
+ {
+ next = ecode + GET(ecode,1);
+ if (md->start_match_ptr < next &&
+ (*ecode == OP_ALT || *next == OP_ALT))
+ rrc = MATCH_NOMATCH;
+ }
- /* PCRE does not allow THEN to escape beyond an assertion; it
- is treated as NOMATCH. */
+ /* Anything other than NOMATCH causes the entire assertion to fail,
+ passing back the return code. This includes COMMIT, SKIP, PRUNE and an
+ uncaptured THEN, which means they take their normal effect. This
+ consistent approach does not always have exactly the same effect as in
+ Perl. */
- if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
ecode += GET(ecode, 1);
}
- while (*ecode == OP_ALT);
+ while (*ecode == OP_ALT); /* Continue for next alternative */
+
+ /* If we have tried all the alternative branches, the assertion has
+ failed. If not, we broke out after a match. */
if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
@@ -1613,17 +1571,16 @@ for (;;)
if (condassert) RRETURN(MATCH_MATCH);
- /* Continue from after the assertion, updating the offsets high water
- mark, since extracts may have been taken during the assertion. */
+ /* Continue from after a successful assertion, updating the offsets high
+ water mark, since extracts may have been taken during the assertion. */
do ecode += GET(ecode,1); while (*ecode == OP_ALT);
ecode += 1 + LINK_SIZE;
offset_top = md->end_offset_top;
continue;
- /* Negative assertion: all branches must fail to match. Encountering SKIP,
- PRUNE, or COMMIT means we must assume failure without checking subsequent
- branches. */
+ /* Negative assertion: all branches must fail to match for the assertion to
+ succeed. */
case OP_ASSERT_NOT:
case OP_ASSERTBACK_NOT:
@@ -1635,28 +1592,64 @@ for (;;)
}
else condassert = FALSE;
+ /* Loop for each alternative branch. */
+
do
{
RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);
- md->mark = save_mark;
- if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) RRETURN(MATCH_NOMATCH);
- if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
+ md->mark = save_mark; /* Always restore the mark setting */
+
+ switch(rrc)
{
- do ecode += GET(ecode,1); while (*ecode == OP_ALT);
+ case MATCH_MATCH: /* A successful match means */
+ case MATCH_ACCEPT: /* the assertion has failed. */
+ RRETURN(MATCH_NOMATCH);
+
+ case MATCH_NOMATCH: /* Carry on with next branch */
break;
+
+ /* See comment in the code for capturing groups above about handling
+ THEN. */
+
+ case MATCH_THEN:
+ next = ecode + GET(ecode,1);
+ if (md->start_match_ptr < next &&
+ (*ecode == OP_ALT || *next == OP_ALT))
+ {
+ rrc = MATCH_NOMATCH;
+ break;
+ }
+ /* Otherwise fall through. */
+
+ /* COMMIT, SKIP, PRUNE, and an uncaptured THEN cause the whole
+ assertion to fail to match, without considering any more alternatives.
+ Failing to match means the assertion is true. This is a consistent
+ approach, but does not always have the same effect as in Perl. */
+
+ case MATCH_COMMIT:
+ case MATCH_SKIP:
+ case MATCH_SKIP_ARG:
+ case MATCH_PRUNE:
+ do ecode += GET(ecode,1); while (*ecode == OP_ALT);
+ goto NEG_ASSERT_TRUE; /* Break out of alternation loop */
+
+ /* Anything else is an error */
+
+ default:
+ RRETURN(rrc);
}
- /* PCRE does not allow THEN to escape beyond an assertion; it is treated
- as NOMATCH. */
+ /* Continue with next branch */
- if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
ecode += GET(ecode,1);
}
while (*ecode == OP_ALT);
- if (condassert) RRETURN(MATCH_MATCH); /* Condition assertion */
+ /* All branches in the assertion failed to match. */
- ecode += 1 + LINK_SIZE;
+ NEG_ASSERT_TRUE:
+ if (condassert) RRETURN(MATCH_MATCH); /* Condition assertion */
+ ecode += 1 + LINK_SIZE; /* Continue with current branch */
continue;
/* Move the subject pointer back. This occurs only at the start of
@@ -1716,7 +1709,9 @@ for (;;)
cb.pattern_position = GET(ecode, 2);
cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
cb.capture_top = offset_top/2;
- cb.capture_last = md->capture_last;
+ cb.capture_last = md->capture_last & CAPLMASK;
+ /* Internal change requires this for API compatibility. */
+ if (cb.capture_last == 0) cb.capture_last = -1;
cb.callout_data = md->callout_data;
cb.mark = md->nomatch_mark;
if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
@@ -1762,6 +1757,7 @@ for (;;)
/* Add to "recursing stack" */
new_recursive.group_num = recno;
+ new_recursive.saved_capture_last = md->capture_last;
new_recursive.subject_position = eptr;
new_recursive.prevrec = md->recursive;
md->recursive = &new_recursive;
@@ -1785,8 +1781,9 @@ for (;;)
new_recursive.saved_max * sizeof(int));
/* OK, now we can do the recursion. After processing each alternative,
- restore the offset data. If there were nested recursions, md->recursive
- might be changed, so reset it before looping. */
+ restore the offset data and the last captured value. If there were nested
+ recursions, md->recursive might be changed, so reset it before looping.
+ */
DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
cbegroup = (*callpat >= OP_SBRA);
@@ -1797,6 +1794,7 @@ for (;;)
md, eptrb, RM6);
memcpy(md->offset_vector, new_recursive.offset_save,
new_recursive.saved_max * sizeof(int));
+ md->capture_last = new_recursive.saved_capture_last;
md->recursive = new_recursive.prevrec;
if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
{
@@ -1813,11 +1811,16 @@ for (;;)
goto RECURSION_MATCHED; /* Exit loop; end processing */
}
- /* PCRE does not allow THEN or COMMIT to escape beyond a recursion; it
- is treated as NOMATCH. */
+ /* PCRE does not allow THEN, SKIP, PRUNE or COMMIT to escape beyond a
+ recursion; they cause a NOMATCH for the entire recursion. These codes
+ are defined in a range that can be tested for. */
+
+ if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX)
+ RRETURN(MATCH_NOMATCH);
- else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN &&
- rrc != MATCH_COMMIT)
+ /* Any return code other than NOMATCH is an error. */
+
+ if (rrc != MATCH_NOMATCH)
{
DPRINTF(("Recursion gave error %d\n", rrc));
if (new_recursive.offset_save != stacksave)
@@ -1947,8 +1950,8 @@ for (;;)
/* Deal with capturing */
- md->capture_last = number;
- if (offset >= md->offset_max) md->offset_overflow = TRUE; else
+ md->capture_last = (md->capture_last & OVFLMASK) | number;
+ if (offset >= md->offset_max) md->capture_last |= OVFLBIT; else
{
/* If offset is greater than offset_top, it means that we are
"skipping" a capturing group, and that group's offsets must be marked
@@ -2004,6 +2007,7 @@ for (;;)
if (*ecode == OP_KETRPOS)
{
+ md->start_match_ptr = mstart; /* In case \K reset it */
md->end_match_ptr = eptr;
md->end_offset_top = offset_top;
RRETURN(MATCH_KETRPOS);
@@ -2571,19 +2575,24 @@ for (;;)
RRETURN(MATCH_NOMATCH);
break;
- case PT_SPACE: /* Perl space */
- if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
- c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
- == (op == OP_NOTPROP))
- RRETURN(MATCH_NOMATCH);
- break;
+ /* Perl space used to exclude VT, but from Perl 5.18 it is included,
+ which means that Perl space and POSIX space are now identical. PCRE
+ was changed at release 8.34. */
+ case PT_SPACE: /* Perl space */
case PT_PXSPACE: /* POSIX space */
- if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
- c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
- c == CHAR_FF || c == CHAR_CR)
- == (op == OP_NOTPROP))
- RRETURN(MATCH_NOMATCH);
+ switch(c)
+ {
+ HSPACE_CASES:
+ VSPACE_CASES:
+ if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
+ break;
+
+ default:
+ if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) ==
+ (op == OP_NOTPROP)) RRETURN(MATCH_NOMATCH);
+ break;
+ }
break;
case PT_WORD:
@@ -2604,6 +2613,13 @@ for (;;)
}
break;
+ case PT_UCNC:
+ if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
+ c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
+ c >= 0xe000) == (op == OP_NOTPROP))
+ RRETURN(MATCH_NOMATCH);
+ break;
+
/* This should never occur */
default:
@@ -2650,15 +2666,7 @@ for (;;)
similar code to character type repeats - written out again for speed.
However, if the referenced string is the empty string, always treat
it as matched, any number of times (otherwise there could be infinite
- loops). */
-
- case OP_REF:
- case OP_REFI:
- caseless = op == OP_REFI;
- offset = GET2(ecode, 1) << 1; /* Doubled ref number */
- ecode += 1 + IMM2_SIZE;
-
- /* If the reference is unset, there are two possibilities:
+ loops). If the reference is unset, there are two possibilities:
(a) In the default, Perl-compatible state, set the length negative;
this ensures that every attempt at a match fails. We can't just fail
@@ -2668,8 +2676,39 @@ for (;;)
so that the back reference matches an empty string.
Otherwise, set the length to the length of what was matched by the
- referenced subpattern. */
+ referenced subpattern.
+
+ The OP_REF and OP_REFI opcodes are used for a reference to a numbered group
+ or to a non-duplicated named group. For a duplicated named group, OP_DNREF
+ and OP_DNREFI are used. In this case we must scan the list of groups to
+ which the name refers, and use the first one that is set. */
+
+ case OP_DNREF:
+ case OP_DNREFI:
+ caseless = op == OP_DNREFI;
+ {
+ int count = GET2(ecode, 1+IMM2_SIZE);
+ pcre_uchar *slot = md->name_table + GET2(ecode, 1) * md->name_entry_size;
+ ecode += 1 + 2*IMM2_SIZE;
+
+ while (count-- > 0)
+ {
+ offset = GET2(slot, 0) << 1;
+ if (offset < offset_top && md->offset_vector[offset] >= 0) break;
+ slot += md->name_entry_size;
+ }
+ if (count < 0)
+ length = (md->jscript_compat)? 0 : -1;
+ else
+ length = md->offset_vector[offset+1] - md->offset_vector[offset];
+ }
+ goto REF_REPEAT;
+ case OP_REF:
+ case OP_REFI:
+ caseless = op == OP_REFI;
+ offset = GET2(ecode, 1) << 1; /* Doubled ref number */
+ ecode += 1 + IMM2_SIZE;
if (offset >= offset_top || md->offset_vector[offset] < 0)
length = (md->jscript_compat)? 0 : -1;
else
@@ -2677,6 +2716,7 @@ for (;;)
/* Set up for repetition, or handle the non-repeated case */
+ REF_REPEAT:
switch (*ecode)
{
case OP_CRSTAR:
@@ -2825,8 +2865,12 @@ for (;;)
case OP_CRMINPLUS:
case OP_CRQUERY:
case OP_CRMINQUERY:
+ case OP_CRPOSSTAR:
+ case OP_CRPOSPLUS:
+ case OP_CRPOSQUERY:
c = *ecode++ - OP_CRSTAR;
- minimize = (c & 1) != 0;
+ if (c < OP_CRPOSSTAR - OP_CRSTAR) minimize = (c & 1) != 0;
+ else possessive = TRUE;
min = rep_min[c]; /* Pick up values from tables; */
max = rep_max[c]; /* zero for max => infinity */
if (max == 0) max = INT_MAX;
@@ -2834,7 +2878,9 @@ for (;;)
case OP_CRRANGE:
case OP_CRMINRANGE:
+ case OP_CRPOSRANGE:
minimize = (*ecode == OP_CRMINRANGE);
+ possessive = (*ecode == OP_CRPOSRANGE);
min = GET2(ecode, 1);
max = GET2(ecode, 1 + IMM2_SIZE);
if (max == 0) max = INT_MAX;
@@ -2976,6 +3022,9 @@ for (;;)
if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
eptr += len;
}
+
+ if (possessive) continue; /* No backtracking */
+
for (;;)
{
RMATCH(eptr, ecode, offset_top, md, eptrb, RM18);
@@ -3006,6 +3055,9 @@ for (;;)
if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
eptr++;
}
+
+ if (possessive) continue; /* No backtracking */
+
while (eptr >= pp)
{
RMATCH(eptr, ecode, offset_top, md, eptrb, RM19);
@@ -3021,9 +3073,10 @@ for (;;)
/* Control never gets here */
- /* Match an extended character class. This opcode is encountered only
- when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
- mode, because Unicode properties are supported in non-UTF-8 mode. */
+ /* Match an extended character class. In the 8-bit library, this opcode is
+ encountered only when UTF-8 mode mode is supported. In the 16-bit and
+ 32-bit libraries, codepoints greater than 255 may be encountered even when
+ UTF is not supported. */
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
case OP_XCLASS:
@@ -3039,8 +3092,12 @@ for (;;)
case OP_CRMINPLUS:
case OP_CRQUERY:
case OP_CRMINQUERY:
+ case OP_CRPOSSTAR:
+ case OP_CRPOSPLUS:
+ case OP_CRPOSQUERY:
c = *ecode++ - OP_CRSTAR;
- minimize = (c & 1) != 0;
+ if (c < OP_CRPOSSTAR - OP_CRSTAR) minimize = (c & 1) != 0;
+ else possessive = TRUE;
min = rep_min[c]; /* Pick up values from tables; */
max = rep_max[c]; /* zero for max => infinity */
if (max == 0) max = INT_MAX;
@@ -3048,7 +3105,9 @@ for (;;)
case OP_CRRANGE:
case OP_CRMINRANGE:
+ case OP_CRPOSRANGE:
minimize = (*ecode == OP_CRMINRANGE);
+ possessive = (*ecode == OP_CRPOSRANGE);
min = GET2(ecode, 1);
max = GET2(ecode, 1 + IMM2_SIZE);
if (max == 0) max = INT_MAX;
@@ -3120,6 +3179,9 @@ for (;;)
if (!PRIV(xclass)(c, data, utf)) break;
eptr += len;
}
+
+ if (possessive) continue; /* No backtracking */
+
for(;;)
{
RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);
@@ -3190,7 +3252,7 @@ for (;;)
if (fc < 128)
{
- pcre_uchar cc = RAWUCHAR(eptr);
+ pcre_uint32 cc = RAWUCHAR(eptr);
if (md->lcc[fc] != TABLE_GET(cc, md->lcc, cc)) RRETURN(MATCH_NOMATCH);
ecode++;
eptr++;
@@ -3295,7 +3357,22 @@ for (;;)
max = rep_max[c]; /* zero for max => infinity */
if (max == 0) max = INT_MAX;
- /* Common code for all repeated single-character matches. */
+ /* Common code for all repeated single-character matches. We first check
+ for the minimum number of characters. If the minimum equals the maximum, we
+ are done. Otherwise, if minimizing, check the rest of the pattern for a
+ match; if there isn't one, advance up to the maximum, one character at a
+ time.
+
+ If maximizing, advance up to the maximum number of matching characters,
+ until eptr is past the end of the maximum run. If possessive, we are
+ then done (no backing up). Otherwise, match at this position; anything
+ other than no match is immediately returned. For nomatch, back up one
+ character, unless we are matching \R and the last thing matched was
+ \r\n, in which case, back up two bytes. When we reach the first optional
+ character position, we can save stack by doing a tail recurse.
+
+ The various UTF/non-UTF and caseful/caseless cases are handled separately,
+ for speed. */
REPEATCHAR:
#ifdef SUPPORT_UTF
@@ -3379,13 +3456,12 @@ for (;;)
}
}
- if (possessive) continue;
-
+ if (possessive) continue; /* No backtracking */
for(;;)
{
+ if (eptr == pp) goto TAIL_RECURSE;
RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (eptr == pp) { RRETURN(MATCH_NOMATCH); }
#ifdef SUPPORT_UCP
eptr--;
BACKCHAR(eptr);
@@ -3439,8 +3515,7 @@ for (;;)
for (i = 1; i <= min; i++)
{
- pcre_uchar cc;
-
+ pcre_uint32 cc; /* Faster than pcre_uchar */
if (eptr >= md->end_subject)
{
SCHECK_PARTIAL();
@@ -3455,8 +3530,7 @@ for (;;)
{
for (fi = min;; fi++)
{
- pcre_uchar cc;
-
+ pcre_uint32 cc; /* Faster than pcre_uchar */
RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (fi >= max) RRETURN(MATCH_NOMATCH);
@@ -3476,8 +3550,7 @@ for (;;)
pp = eptr;
for (i = min; i < max; i++)
{
- pcre_uchar cc;
-
+ pcre_uint32 cc; /* Faster than pcre_uchar */
if (eptr >= md->end_subject)
{
SCHECK_PARTIAL();
@@ -3487,18 +3560,16 @@ for (;;)
if (fc != cc && foc != cc) break;
eptr++;
}
-
- if (possessive) continue;
-
- while (eptr >= pp)
+ if (possessive) continue; /* No backtracking */
+ for (;;)
{
+ if (eptr == pp) goto TAIL_RECURSE;
RMATCH(eptr, ecode, offset_top, md, eptrb, RM25);
eptr--;
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
}
- RRETURN(MATCH_NOMATCH);
+ /* Control never gets here */
}
- /* Control never gets here */
}
/* Caseful comparisons (includes all multi-byte characters) */
@@ -3546,15 +3617,15 @@ for (;;)
if (fc != RAWUCHARTEST(eptr)) break;
eptr++;
}
- if (possessive) continue;
-
- while (eptr >= pp)
+ if (possessive) continue; /* No backtracking */
+ for (;;)
{
+ if (eptr == pp) goto TAIL_RECURSE;
RMATCH(eptr, ecode, offset_top, md, eptrb, RM27);
eptr--;
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
}
- RRETURN(MATCH_NOMATCH);
+ /* Control never gets here */
}
}
/* Control never gets here */
@@ -3726,7 +3797,7 @@ for (;;)
}
}
else
-#endif
+#endif /* SUPPORT_UTF */
/* Not UTF mode */
{
for (i = 1; i <= min; i++)
@@ -3764,7 +3835,7 @@ for (;;)
}
}
else
-#endif
+#endif /*SUPPORT_UTF */
/* Not UTF mode */
{
for (fi = min;; fi++)
@@ -3806,17 +3877,18 @@ for (;;)
if (fc == d || (unsigned int)foc == d) break;
eptr += len;
}
- if (possessive) continue;
+ if (possessive) continue; /* No backtracking */
for(;;)
{
+ if (eptr == pp) goto TAIL_RECURSE;
RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (eptr-- == pp) break; /* Stop if tried at original pos */
+ eptr--;
BACKCHAR(eptr);
}
}
else
-#endif
+#endif /* SUPPORT_UTF */
/* Not UTF mode */
{
for (i = min; i < max; i++)
@@ -3829,18 +3901,17 @@ for (;;)
if (fc == *eptr || foc == *eptr) break;
eptr++;
}
- if (possessive) continue;
- while (eptr >= pp)
+ if (possessive) continue; /* No backtracking */
+ for (;;)
{
+ if (eptr == pp) goto TAIL_RECURSE;
RMATCH(eptr, ecode, offset_top, md, eptrb, RM31);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
eptr--;
}
}
-
- RRETURN(MATCH_NOMATCH);
+ /* Control never gets here */
}
- /* Control never gets here */
}
/* Caseful comparisons */
@@ -3941,12 +4012,13 @@ for (;;)
if (fc == d) break;
eptr += len;
}
- if (possessive) continue;
+ if (possessive) continue; /* No backtracking */
for(;;)
{
+ if (eptr == pp) goto TAIL_RECURSE;
RMATCH(eptr, ecode, offset_top, md, eptrb, RM34);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (eptr-- == pp) break; /* Stop if tried at original pos */
+ eptr--;
BACKCHAR(eptr);
}
}
@@ -3964,16 +4036,16 @@ for (;;)
if (fc == *eptr) break;
eptr++;
}
- if (possessive) continue;
- while (eptr >= pp)
+ if (possessive) continue; /* No backtracking */
+ for (;;)
{
+ if (eptr == pp) goto TAIL_RECURSE;
RMATCH(eptr, ecode, offset_top, md, eptrb, RM35);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
eptr--;
}
}
-
- RRETURN(MATCH_NOMATCH);
+ /* Control never gets here */
}
}
/* Control never gets here */
@@ -4155,7 +4227,12 @@ for (;;)
}
break;
+ /* Perl space used to exclude VT, but from Perl 5.18 it is included,
+ which means that Perl space and POSIX space are now identical. PCRE
+ was changed at release 8.34. */
+
case PT_SPACE: /* Perl space */
+ case PT_PXSPACE: /* POSIX space */
for (i = 1; i <= min; i++)
{
if (eptr >= md->end_subject)
@@ -4164,26 +4241,18 @@ for (;;)
RRETURN(MATCH_NOMATCH);
}
GETCHARINCTEST(c, eptr);
- if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
- c == CHAR_FF || c == CHAR_CR)
- == prop_fail_result)
- RRETURN(MATCH_NOMATCH);
- }
- break;
-
- case PT_PXSPACE: /* POSIX space */
- for (i = 1; i <= min; i++)
- {
- if (eptr >= md->end_subject)
+ switch(c)
{
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
+ HSPACE_CASES:
+ VSPACE_CASES:
+ if (prop_fail_result) RRETURN(MATCH_NOMATCH);
+ break;
+
+ default:
+ if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
+ RRETURN(MATCH_NOMATCH);
+ break;
}
- GETCHARINCTEST(c, eptr);
- if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
- c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
- == prop_fail_result)
- RRETURN(MATCH_NOMATCH);
}
break;
@@ -4225,6 +4294,22 @@ for (;;)
}
break;
+ case PT_UCNC:
+ for (i = 1; i <= min; i++)
+ {
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
+ GETCHARINCTEST(c, eptr);
+ if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
+ c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
+ c >= 0xe000) == prop_fail_result)
+ RRETURN(MATCH_NOMATCH);
+ }
+ break;
+
/* This should not occur */
default:
@@ -4430,8 +4515,7 @@ for (;;)
case OP_DIGIT:
for (i = 1; i <= min; i++)
{
- pcre_uchar cc;
-
+ pcre_uint32 cc;
if (eptr >= md->end_subject)
{
SCHECK_PARTIAL();
@@ -4448,8 +4532,7 @@ for (;;)
case OP_NOT_WHITESPACE:
for (i = 1; i <= min; i++)
{
- pcre_uchar cc;
-
+ pcre_uint32 cc;
if (eptr >= md->end_subject)
{
SCHECK_PARTIAL();
@@ -4466,8 +4549,7 @@ for (;;)
case OP_WHITESPACE:
for (i = 1; i <= min; i++)
{
- pcre_uchar cc;
-
+ pcre_uint32 cc;
if (eptr >= md->end_subject)
{
SCHECK_PARTIAL();
@@ -4484,8 +4566,7 @@ for (;;)
case OP_NOT_WORDCHAR:
for (i = 1; i <= min; i++)
{
- pcre_uchar cc;
-
+ pcre_uint32 cc;
if (eptr >= md->end_subject)
{
SCHECK_PARTIAL();
@@ -4502,8 +4583,7 @@ for (;;)
case OP_WORDCHAR:
for (i = 1; i <= min; i++)
{
- pcre_uchar cc;
-
+ pcre_uint32 cc;
if (eptr >= md->end_subject)
{
SCHECK_PARTIAL();
@@ -4892,25 +4972,11 @@ for (;;)
}
/* Control never gets here */
- case PT_SPACE: /* Perl space */
- for (fi = min;; fi++)
- {
- RMATCH(eptr, ecode, offset_top, md, eptrb, RM60);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max) RRETURN(MATCH_NOMATCH);
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
- GETCHARINCTEST(c, eptr);
- if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
- c == CHAR_FF || c == CHAR_CR)
- == prop_fail_result)
- RRETURN(MATCH_NOMATCH);
- }
- /* Control never gets here */
+ /* Perl space used to exclude VT, but from Perl 5.18 it is included,
+ which means that Perl space and POSIX space are now identical. PCRE
+ was changed at release 8.34. */
+ case PT_SPACE: /* Perl space */
case PT_PXSPACE: /* POSIX space */
for (fi = min;; fi++)
{
@@ -4923,10 +4989,18 @@ for (;;)
RRETURN(MATCH_NOMATCH);
}
GETCHARINCTEST(c, eptr);
- if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
- c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
- == prop_fail_result)
- RRETURN(MATCH_NOMATCH);
+ switch(c)
+ {
+ HSPACE_CASES:
+ VSPACE_CASES:
+ if (prop_fail_result) RRETURN(MATCH_NOMATCH);
+ break;
+
+ default:
+ if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
+ RRETURN(MATCH_NOMATCH);
+ break;
+ }
}
/* Control never gets here */
@@ -4976,6 +5050,25 @@ for (;;)
}
/* Control never gets here */
+ case PT_UCNC:
+ for (fi = min;; fi++)
+ {
+ RMATCH(eptr, ecode, offset_top, md, eptrb, RM60);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+ if (fi >= max) RRETURN(MATCH_NOMATCH);
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
+ GETCHARINCTEST(c, eptr);
+ if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
+ c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
+ c >= 0xe000) == prop_fail_result)
+ RRETURN(MATCH_NOMATCH);
+ }
+ /* Control never gets here */
+
/* This should never occur */
default:
RRETURN(PCRE_ERROR_INTERNAL);
@@ -5391,7 +5484,12 @@ for (;;)
}
break;
+ /* Perl space used to exclude VT, but from Perl 5.18 it is included,
+ which means that Perl space and POSIX space are now identical. PCRE
+ was changed at release 8.34. */
+
case PT_SPACE: /* Perl space */
+ case PT_PXSPACE: /* POSIX space */
for (i = min; i < max; i++)
{
int len = 1;
@@ -5401,30 +5499,21 @@ for (;;)
break;
}
GETCHARLENTEST(c, eptr, len);
- if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
- c == CHAR_FF || c == CHAR_CR)
- == prop_fail_result)
+ switch(c)
+ {
+ HSPACE_CASES:
+ VSPACE_CASES:
+ if (prop_fail_result) goto ENDLOOP99; /* Break the loop */
break;
- eptr+= len;
- }
- break;
- case PT_PXSPACE: /* POSIX space */
- for (i = min; i < max; i++)
- {
- int len = 1;
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
+ default:
+ if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
+ goto ENDLOOP99; /* Break the loop */
break;
}
- GETCHARLENTEST(c, eptr, len);
- if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
- c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
- == prop_fail_result)
- break;
eptr+= len;
}
+ ENDLOOP99:
break;
case PT_WORD:
@@ -5470,23 +5559,42 @@ for (;;)
GOT_MAX:
break;
+ case PT_UCNC:
+ for (i = min; i < max; i++)
+ {
+ int len = 1;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
+ GETCHARLENTEST(c, eptr, len);
+ if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
+ c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
+ c >= 0xe000) == prop_fail_result)
+ break;
+ eptr += len;
+ }
+ break;
+
default:
RRETURN(PCRE_ERROR_INTERNAL);
}
/* eptr is now past the end of the maximum run */
- if (possessive) continue;
+ if (possessive) continue; /* No backtracking */
for(;;)
{
+ if (eptr == pp) goto TAIL_RECURSE;
RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (eptr-- == pp) break; /* Stop if tried at original pos */
+ eptr--;
if (utf) BACKCHAR(eptr);
}
}
- /* Match extended Unicode sequences. We will get here only if the
+ /* Match extended Unicode grapheme clusters. We will get here only if the
support is in the binary; otherwise a compile-time error occurs. */
else if (ctype == OP_EXTUNI)
@@ -5518,22 +5626,42 @@ for (;;)
/* eptr is now past the end of the maximum run */
- if (possessive) continue;
+ if (possessive) continue; /* No backtracking */
for(;;)
{
+ int lgb, rgb;
+ PCRE_PUCHAR fptr;
+
+ if (eptr == pp) goto TAIL_RECURSE; /* At start of char run */
RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (eptr-- == pp) break; /* Stop if tried at original pos */
- for (;;) /* Move back over one extended */
+
+ /* Backtracking over an extended grapheme cluster involves inspecting
+ the previous two characters (if present) to see if a break is
+ permitted between them. */
+
+ eptr--;
+ if (!utf) c = *eptr; else
+ {
+ BACKCHAR(eptr);
+ GETCHAR(c, eptr);
+ }
+ rgb = UCD_GRAPHBREAK(c);
+
+ for (;;)
{
- if (!utf) c = *eptr; else
+ if (eptr == pp) goto TAIL_RECURSE; /* At start of char run */
+ fptr = eptr - 1;
+ if (!utf) c = *fptr; else
{
- BACKCHAR(eptr);
- GETCHAR(c, eptr);
+ BACKCHAR(fptr);
+ GETCHAR(c, fptr);
}
- if (UCD_CATEGORY(c) != ucp_M) break;
- eptr--;
+ lgb = UCD_GRAPHBREAK(c);
+ if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
+ eptr = fptr;
+ rgb = lgb;
}
}
}
@@ -5799,18 +5927,13 @@ for (;;)
RRETURN(PCRE_ERROR_INTERNAL);
}
- /* eptr is now past the end of the maximum run. If possessive, we are
- done (no backing up). Otherwise, match at this position; anything other
- than no match is immediately returned. For nomatch, back up one
- character, unless we are matching \R and the last thing matched was
- \r\n, in which case, back up two bytes. */
-
- if (possessive) continue;
+ if (possessive) continue; /* No backtracking */
for(;;)
{
+ if (eptr == pp) goto TAIL_RECURSE;
RMATCH(eptr, ecode, offset_top, md, eptrb, RM46);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (eptr-- == pp) break; /* Stop if tried at original pos */
+ eptr--;
BACKCHAR(eptr);
if (ctype == OP_ANYNL && eptr > pp && RAWUCHAR(eptr) == CHAR_NL &&
RAWUCHAR(eptr - 1) == CHAR_CR) eptr--;
@@ -6048,15 +6171,10 @@ for (;;)
RRETURN(PCRE_ERROR_INTERNAL);
}
- /* eptr is now past the end of the maximum run. If possessive, we are
- done (no backing up). Otherwise, match at this position; anything other
- than no match is immediately returned. For nomatch, back up one
- character (byte), unless we are matching \R and the last thing matched
- was \r\n, in which case, back up two bytes. */
-
- if (possessive) continue;
- while (eptr >= pp)
+ if (possessive) continue; /* No backtracking */
+ for (;;)
{
+ if (eptr == pp) goto TAIL_RECURSE;
RMATCH(eptr, ecode, offset_top, md, eptrb, RM47);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
eptr--;
@@ -6065,11 +6183,8 @@ for (;;)
}
}
- /* Get here if we can't make it match with any permitted repetitions */
-
- RRETURN(MATCH_NOMATCH);
+ /* Control never gets here */
}
- /* Control never gets here */
/* There's been some horrible disaster. Arrival here can only mean there is
something seriously wrong in the code above or the OP_xxx definitions. */
@@ -6103,10 +6218,10 @@ switch (frame->Xwhere)
LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)
LBL(65) LBL(66)
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
- LBL(21)
+ LBL(20) LBL(21)
#endif
#ifdef SUPPORT_UTF
- LBL(16) LBL(18) LBL(20)
+ LBL(16) LBL(18)
LBL(22) LBL(23) LBL(28) LBL(30)
LBL(32) LBL(34) LBL(42) LBL(46)
#ifdef SUPPORT_UCP
@@ -6264,6 +6379,7 @@ const pcre_uint8 *start_bits = NULL;
PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;
PCRE_PUCHAR end_subject;
PCRE_PUCHAR start_partial = NULL;
+PCRE_PUCHAR match_partial = NULL;
PCRE_PUCHAR req_char_ptr = start_match - 1;
const pcre_study_data *study;
@@ -6393,6 +6509,8 @@ md->callout_data = NULL;
tables = re->tables;
+/* The two limit values override the defaults, whatever their value. */
+
if (extra_data != NULL)
{
register unsigned int flags = extra_data->flags;
@@ -6407,6 +6525,15 @@ if (extra_data != NULL)
if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
}
+/* Limits in the regex override only if they are smaller. */
+
+if ((re->flags & PCRE_MLSET) != 0 && re->limit_match < md->match_limit)
+ md->match_limit = re->limit_match;
+
+if ((re->flags & PCRE_RLSET) != 0 &&
+ re->limit_recursion < md->match_limit_recursion)
+ md->match_limit_recursion = re->limit_recursion;
+
/* If the exec call supplied NULL for tables, use the inbuilt ones. This
is a feature that makes it possible to save compiled regex and re-use them
in other programs later. */
@@ -6432,7 +6559,7 @@ end_subject = md->end_subject;
md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
md->use_ucp = (re->options & PCRE_UCP) != 0;
md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
-md->ignore_skip_arg = FALSE;
+md->ignore_skip_arg = 0;
/* Some options are unpacked into BOOL variables in the hope that testing
them will be faster than individual option bits. */
@@ -6542,11 +6669,9 @@ if (re->top_backref > 0 && re->top_backref >= ocount/3)
DPRINTF(("Got memory to hold back references\n"));
}
else md->offset_vector = offsets;
-
md->offset_end = ocount;
md->offset_max = (2*ocount)/3;
-md->offset_overflow = FALSE;
-md->capture_last = -1;
+md->capture_last = 0;
/* Reset the working variable associated with each extraction. These should
never be used unless previously set, but they get saved and restored, and so we
@@ -6816,8 +6941,13 @@ for(;;)
md->match_call_count = 0;
md->match_function_type = 0;
md->end_offset_top = 0;
+ md->skip_arg_count = 0;
rc = match(start_match, md->start_code, start_match, 2, md, NULL, 0);
- if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;
+ if (md->hitend && start_partial == NULL)
+ {
+ start_partial = md->start_used_ptr;
+ match_partial = start_match;
+ }
switch(rc)
{
@@ -6830,14 +6960,14 @@ for(;;)
case MATCH_SKIP_ARG:
new_start_match = start_match;
- md->ignore_skip_arg = TRUE;
+ md->ignore_skip_arg = md->skip_arg_count;
break;
- /* SKIP passes back the next starting point explicitly, but if it is the
- same as the match we have just done, treat it as NOMATCH. */
+ /* SKIP passes back the next starting point explicitly, but if it is no
+ greater than the match we have just done, treat it as NOMATCH. */
case MATCH_SKIP:
- if (md->start_match_ptr != start_match)
+ if (md->start_match_ptr > start_match)
{
new_start_match = md->start_match_ptr;
break;
@@ -6845,12 +6975,12 @@ for(;;)
/* Fall through */
/* NOMATCH and PRUNE advance by one character. THEN at this level acts
- exactly like PRUNE. Unset the ignore SKIP-with-argument flag. */
+ exactly like PRUNE. Unset ignore SKIP-with-argument. */
case MATCH_NOMATCH:
case MATCH_PRUNE:
case MATCH_THEN:
- md->ignore_skip_arg = FALSE;
+ md->ignore_skip_arg = 0;
new_start_match = start_match + 1;
#ifdef SUPPORT_UTF
if (utf)
@@ -6943,7 +7073,7 @@ if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
(arg_offset_max - 2) * sizeof(int));
DPRINTF(("Copied offsets from temporary memory\n"));
}
- if (md->end_offset_top > arg_offset_max) md->offset_overflow = TRUE;
+ if (md->end_offset_top > arg_offset_max) md->capture_last |= OVFLBIT;
DPRINTF(("Freeing temporary memory\n"));
(PUBL(free))(md->offset_vector);
}
@@ -6951,7 +7081,8 @@ if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
/* Set the return code to the number of captured strings, or 0 if there were
too many to fit into the vector. */
- rc = (md->offset_overflow && md->end_offset_top >= arg_offset_max)?
+ rc = ((md->capture_last & OVFLBIT) != 0 &&
+ md->end_offset_top >= arg_offset_max)?
0 : md->end_offset_top/2;
/* If there is space in the offset vector, set any unused pairs at the end of
@@ -7016,7 +7147,7 @@ if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
/* Handle partial matches - disable any mark data */
-if (start_partial != NULL)
+if (match_partial != NULL)
{
DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
md->mark = NULL;
@@ -7024,6 +7155,8 @@ if (start_partial != NULL)
{
offsets[0] = (int)(start_partial - (PCRE_PUCHAR)subject);
offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
+ if (offsetcount > 2)
+ offsets[2] = (int)(match_partial - (PCRE_PUCHAR)subject);
}
rc = PCRE_ERROR_PARTIAL;
}
diff --git a/src/3rdparty/pcre/pcre_fullinfo.c b/src/3rdparty/pcre/pcre_fullinfo.c
index e64da06eb4..dfac243573 100644
--- a/src/3rdparty/pcre/pcre_fullinfo.c
+++ b/src/3rdparty/pcre/pcre_fullinfo.c
@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
- Copyright (c) 1997-2012 University of Cambridge
+ Copyright (c) 1997-2013 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -222,6 +222,20 @@ switch (what)
*((int *)where) = re->max_lookbehind;
break;
+ case PCRE_INFO_MATCHLIMIT:
+ if ((re->flags & PCRE_MLSET) == 0) return PCRE_ERROR_UNSET;
+ *((pcre_uint32 *)where) = re->limit_match;
+ break;
+
+ case PCRE_INFO_RECURSIONLIMIT:
+ if ((re->flags & PCRE_RLSET) == 0) return PCRE_ERROR_UNSET;
+ *((pcre_uint32 *)where) = re->limit_recursion;
+ break;
+
+ case PCRE_INFO_MATCH_EMPTY:
+ *((int *)where) = (re->flags & PCRE_MATCH_EMPTY) != 0;
+ break;
+
default: return PCRE_ERROR_BADOPTION;
}
diff --git a/src/3rdparty/pcre/pcre_internal.h b/src/3rdparty/pcre/pcre_internal.h
index f3cb001fea..0b9798c554 100644
--- a/src/3rdparty/pcre/pcre_internal.h
+++ b/src/3rdparty/pcre/pcre_internal.h
@@ -7,7 +7,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
- Copyright (c) 1997-2012 University of Cambridge
+ Copyright (c) 1997-2013 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -194,23 +194,31 @@ preprocessor time in standard C environments. */
typedef unsigned char pcre_uint8;
#if USHRT_MAX == 65535
- typedef unsigned short pcre_uint16;
- typedef short pcre_int16;
+typedef unsigned short pcre_uint16;
+typedef short pcre_int16;
+#define PCRE_UINT16_MAX USHRT_MAX
+#define PCRE_INT16_MAX SHRT_MAX
#elif UINT_MAX == 65535
- typedef unsigned int pcre_uint16;
- typedef int pcre_int16;
+typedef unsigned int pcre_uint16;
+typedef int pcre_int16;
+#define PCRE_UINT16_MAX UINT_MAX
+#define PCRE_INT16_MAX INT_MAX
#else
-# error Cannot determine a type for 16-bit unsigned integers
+#error Cannot determine a type for 16-bit integers
#endif
-#if UINT_MAX == 4294967295
- typedef unsigned int pcre_uint32;
- typedef int pcre_int32;
-#elif ULONG_MAX == 4294967295
- typedef unsigned long int pcre_uint32;
- typedef long int pcre_int32;
+#if UINT_MAX == 4294967295U
+typedef unsigned int pcre_uint32;
+typedef int pcre_int32;
+#define PCRE_UINT32_MAX UINT_MAX
+#define PCRE_INT32_MAX INT_MAX
+#elif ULONG_MAX == 4294967295UL
+typedef unsigned long int pcre_uint32;
+typedef long int pcre_int32;
+#define PCRE_UINT32_MAX ULONG_MAX
+#define PCRE_INT32_MAX LONG_MAX
#else
-# error Cannot determine a type for 32-bit unsigned integers
+#error Cannot determine a type for 32-bit integers
#endif
/* When checking for integer overflow in pcre_compile(), we need to handle
@@ -1121,23 +1129,27 @@ other. NOTE: The values also appear in pcre_jit_compile.c. */
/* Private flags containing information about the compiled regex. They used to
-live at the top end of the options word, but that got almost full, so now they
-are in a 16-bit flags word. From release 8.00, PCRE_NOPARTIAL is unused, as
-the restrictions on partial matching have been lifted. It remains for backwards
+live at the top end of the options word, but that got almost full, so they were
+moved to a 16-bit flags word - which got almost full, so now they are in a
+32-bit flags word. From release 8.00, PCRE_NOPARTIAL is unused, as the
+restrictions on partial matching have been lifted. It remains for backwards
compatibility. */
-#define PCRE_MODE8 0x0001 /* compiled in 8 bit mode */
-#define PCRE_MODE16 0x0002 /* compiled in 16 bit mode */
-#define PCRE_MODE32 0x0004 /* compiled in 32 bit mode */
-#define PCRE_FIRSTSET 0x0010 /* first_char is set */
-#define PCRE_FCH_CASELESS 0x0020 /* caseless first char */
-#define PCRE_REQCHSET 0x0040 /* req_byte is set */
-#define PCRE_RCH_CASELESS 0x0080 /* caseless requested char */
-#define PCRE_STARTLINE 0x0100 /* start after \n for multiline */
-#define PCRE_NOPARTIAL 0x0200 /* can't use partial with this regex */
-#define PCRE_JCHANGED 0x0400 /* j option used in regex */
-#define PCRE_HASCRORLF 0x0800 /* explicit \r or \n in pattern */
-#define PCRE_HASTHEN 0x1000 /* pattern contains (*THEN) */
+#define PCRE_MODE8 0x00000001 /* compiled in 8 bit mode */
+#define PCRE_MODE16 0x00000002 /* compiled in 16 bit mode */
+#define PCRE_MODE32 0x00000004 /* compiled in 32 bit mode */
+#define PCRE_FIRSTSET 0x00000010 /* first_char is set */
+#define PCRE_FCH_CASELESS 0x00000020 /* caseless first char */
+#define PCRE_REQCHSET 0x00000040 /* req_byte is set */
+#define PCRE_RCH_CASELESS 0x00000080 /* caseless requested char */
+#define PCRE_STARTLINE 0x00000100 /* start after \n for multiline */
+#define PCRE_NOPARTIAL 0x00000200 /* can't use partial with this regex */
+#define PCRE_JCHANGED 0x00000400 /* j option used in regex */
+#define PCRE_HASCRORLF 0x00000800 /* explicit \r or \n in pattern */
+#define PCRE_HASTHEN 0x00001000 /* pattern contains (*THEN) */
+#define PCRE_MLSET 0x00002000 /* match limit set by regex */
+#define PCRE_RLSET 0x00004000 /* recursion limit set by regex */
+#define PCRE_MATCH_EMPTY 0x00008000 /* pattern can match empty string */
#if defined COMPILE_PCRE8
#define PCRE_MODE PCRE_MODE8
@@ -1162,9 +1174,10 @@ time, run time, or study time, respectively. */
#define PUBLIC_COMPILE_OPTIONS \
(PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \
PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8| \
- PCRE_NO_AUTO_CAPTURE|PCRE_NO_UTF8_CHECK|PCRE_AUTO_CALLOUT|PCRE_FIRSTLINE| \
+ PCRE_NO_AUTO_CAPTURE|PCRE_NO_AUTO_POSSESS| \
+ PCRE_NO_UTF8_CHECK|PCRE_AUTO_CALLOUT|PCRE_FIRSTLINE| \
PCRE_DUPNAMES|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE| \
- PCRE_JAVASCRIPT_COMPAT|PCRE_UCP|PCRE_NO_START_OPTIMIZE)
+ PCRE_JAVASCRIPT_COMPAT|PCRE_UCP|PCRE_NO_START_OPTIMIZE|PCRE_NEVER_UTF)
#define PUBLIC_EXEC_OPTIONS \
(PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NOTEMPTY_ATSTART| \
@@ -1520,20 +1533,25 @@ a positive value. */
#define STRING_xdigit "xdigit"
#define STRING_DEFINE "DEFINE"
-
-#define STRING_CR_RIGHTPAR "CR)"
-#define STRING_LF_RIGHTPAR "LF)"
-#define STRING_CRLF_RIGHTPAR "CRLF)"
-#define STRING_ANY_RIGHTPAR "ANY)"
-#define STRING_ANYCRLF_RIGHTPAR "ANYCRLF)"
-#define STRING_BSR_ANYCRLF_RIGHTPAR "BSR_ANYCRLF)"
-#define STRING_BSR_UNICODE_RIGHTPAR "BSR_UNICODE)"
-#define STRING_UTF8_RIGHTPAR "UTF8)"
-#define STRING_UTF16_RIGHTPAR "UTF16)"
-#define STRING_UTF32_RIGHTPAR "UTF32)"
-#define STRING_UTF_RIGHTPAR "UTF)"
-#define STRING_UCP_RIGHTPAR "UCP)"
-#define STRING_NO_START_OPT_RIGHTPAR "NO_START_OPT)"
+#define STRING_WEIRD_STARTWORD "[:<:]]"
+#define STRING_WEIRD_ENDWORD "[:>:]]"
+
+#define STRING_CR_RIGHTPAR "CR)"
+#define STRING_LF_RIGHTPAR "LF)"
+#define STRING_CRLF_RIGHTPAR "CRLF)"
+#define STRING_ANY_RIGHTPAR "ANY)"
+#define STRING_ANYCRLF_RIGHTPAR "ANYCRLF)"
+#define STRING_BSR_ANYCRLF_RIGHTPAR "BSR_ANYCRLF)"
+#define STRING_BSR_UNICODE_RIGHTPAR "BSR_UNICODE)"
+#define STRING_UTF8_RIGHTPAR "UTF8)"
+#define STRING_UTF16_RIGHTPAR "UTF16)"
+#define STRING_UTF32_RIGHTPAR "UTF32)"
+#define STRING_UTF_RIGHTPAR "UTF)"
+#define STRING_UCP_RIGHTPAR "UCP)"
+#define STRING_NO_AUTO_POSSESS_RIGHTPAR "NO_AUTO_POSSESS)"
+#define STRING_NO_START_OPT_RIGHTPAR "NO_START_OPT)"
+#define STRING_LIMIT_MATCH_EQ "LIMIT_MATCH="
+#define STRING_LIMIT_RECURSION_EQ "LIMIT_RECURSION="
#else /* SUPPORT_UTF */
@@ -1781,20 +1799,25 @@ only. */
#define STRING_xdigit STR_x STR_d STR_i STR_g STR_i STR_t
#define STRING_DEFINE STR_D STR_E STR_F STR_I STR_N STR_E
-
-#define STRING_CR_RIGHTPAR STR_C STR_R STR_RIGHT_PARENTHESIS
-#define STRING_LF_RIGHTPAR STR_L STR_F STR_RIGHT_PARENTHESIS
-#define STRING_CRLF_RIGHTPAR STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
-#define STRING_ANY_RIGHTPAR STR_A STR_N STR_Y STR_RIGHT_PARENTHESIS
-#define STRING_ANYCRLF_RIGHTPAR STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
-#define STRING_BSR_ANYCRLF_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
-#define STRING_BSR_UNICODE_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_U STR_N STR_I STR_C STR_O STR_D STR_E STR_RIGHT_PARENTHESIS
-#define STRING_UTF8_RIGHTPAR STR_U STR_T STR_F STR_8 STR_RIGHT_PARENTHESIS
-#define STRING_UTF16_RIGHTPAR STR_U STR_T STR_F STR_1 STR_6 STR_RIGHT_PARENTHESIS
-#define STRING_UTF32_RIGHTPAR STR_U STR_T STR_F STR_3 STR_2 STR_RIGHT_PARENTHESIS
-#define STRING_UTF_RIGHTPAR STR_U STR_T STR_F STR_RIGHT_PARENTHESIS
-#define STRING_UCP_RIGHTPAR STR_U STR_C STR_P STR_RIGHT_PARENTHESIS
-#define STRING_NO_START_OPT_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS
+#define STRING_WEIRD_STARTWORD STR_LEFT_SQUARE_BRACKET STR_COLON STR_LESS_THAN_SIGN STR_COLON STR_RIGHT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET
+#define STRING_WEIRD_ENDWORD STR_LEFT_SQUARE_BRACKET STR_COLON STR_GREATER_THAN_SIGN STR_COLON STR_RIGHT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET
+
+#define STRING_CR_RIGHTPAR STR_C STR_R STR_RIGHT_PARENTHESIS
+#define STRING_LF_RIGHTPAR STR_L STR_F STR_RIGHT_PARENTHESIS
+#define STRING_CRLF_RIGHTPAR STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
+#define STRING_ANY_RIGHTPAR STR_A STR_N STR_Y STR_RIGHT_PARENTHESIS
+#define STRING_ANYCRLF_RIGHTPAR STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
+#define STRING_BSR_ANYCRLF_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
+#define STRING_BSR_UNICODE_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_U STR_N STR_I STR_C STR_O STR_D STR_E STR_RIGHT_PARENTHESIS
+#define STRING_UTF8_RIGHTPAR STR_U STR_T STR_F STR_8 STR_RIGHT_PARENTHESIS
+#define STRING_UTF16_RIGHTPAR STR_U STR_T STR_F STR_1 STR_6 STR_RIGHT_PARENTHESIS
+#define STRING_UTF32_RIGHTPAR STR_U STR_T STR_F STR_3 STR_2 STR_RIGHT_PARENTHESIS
+#define STRING_UTF_RIGHTPAR STR_U STR_T STR_F STR_RIGHT_PARENTHESIS
+#define STRING_UCP_RIGHTPAR STR_U STR_C STR_P STR_RIGHT_PARENTHESIS
+#define STRING_NO_AUTO_POSSESS_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_A STR_U STR_T STR_O STR_UNDERSCORE STR_P STR_O STR_S STR_S STR_E STR_S STR_S STR_RIGHT_PARENTHESIS
+#define STRING_NO_START_OPT_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS
+#define STRING_LIMIT_MATCH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_M STR_A STR_T STR_C STR_H STR_EQUALS_SIGN
+#define STRING_LIMIT_RECURSION_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_R STR_E STR_C STR_U STR_R STR_S STR_I STR_O STR_N STR_EQUALS_SIGN
#endif /* SUPPORT_UTF */
@@ -1835,6 +1858,18 @@ only. */
#define PT_PXSPACE 7 /* POSIX space - Z plus 9,10,11,12,13 */
#define PT_WORD 8 /* Word - L plus N plus underscore */
#define PT_CLIST 9 /* Pseudo-property: match character list */
+#define PT_UCNC 10 /* Universal Character nameable character */
+#define PT_TABSIZE 11 /* Size of square table for autopossessify tests */
+
+/* The following special properties are used only in XCLASS items, when POSIX
+classes are specified and PCRE_UCP is set - in other words, for Unicode
+handling of these classes. They are not available via the \p or \P escapes like
+those in the above list, and so they do not take part in the autopossessifying
+table. */
+
+#define PT_PXGRAPH 11 /* [:graph:] - characters that mark the paper */
+#define PT_PXPRINT 12 /* [:print:] - [:graph:] plus non-control spaces */
+#define PT_PXPUNCT 13 /* [:punct:] - punctuation characters */
/* Flag bits and data types for the extended class (OP_XCLASS) for classes that
contain characters with values greater than 255. */
@@ -1849,9 +1884,9 @@ contain characters with values greater than 255. */
#define XCL_NOTPROP 4 /* Unicode inverted property (ditto) */
/* These are escaped items that aren't just an encoding of a particular data
-value such as \n. They must have non-zero values, as check_escape() returns
-0 for a data character. Also, they must appear in the same order as in the opcode
-definitions below, up to ESC_z. There's a dummy for OP_ALLANY because it
+value such as \n. They must have non-zero values, as check_escape() returns 0
+for a data character. Also, they must appear in the same order as in the
+opcode definitions below, up to ESC_z. There's a dummy for OP_ALLANY because it
corresponds to "." in DOTALL mode rather than an escape sequence. It is also
used for [^] in JavaScript compatibility mode, and for \C in non-utf mode. In
non-DOTALL mode, "." behaves like \N.
@@ -1874,12 +1909,31 @@ enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s,
ESC_E, ESC_Q, ESC_g, ESC_k,
ESC_DU, ESC_du, ESC_SU, ESC_su, ESC_WU, ESC_wu };
-/* Opcode table: Starting from 1 (i.e. after OP_END), the values up to
-OP_EOD must correspond in order to the list of escapes immediately above.
-*** NOTE NOTE NOTE *** Whenever this list is updated, the two macro definitions
-that follow must also be updated to match. There are also tables called
-"coptable" and "poptable" in pcre_dfa_exec.c that must be updated. */
+/********************** Opcode definitions ******************/
+
+/****** NOTE NOTE NOTE ******
+
+Starting from 1 (i.e. after OP_END), the values up to OP_EOD must correspond in
+order to the list of escapes immediately above. Furthermore, values up to
+OP_DOLLM must not be changed without adjusting the table called autoposstab in
+pcre_compile.c
+
+Whenever this list is updated, the two macro definitions that follow must be
+updated to match. The possessification table called "opcode_possessify" in
+pcre_compile.c must also be updated, and also the tables called "coptable"
+and "poptable" in pcre_dfa_exec.c.
+
+****** NOTE NOTE NOTE ******/
+
+
+/* The values between FIRST_AUTOTAB_OP and LAST_AUTOTAB_RIGHT_OP, inclusive,
+are used in a table for deciding whether a repeated character type can be
+auto-possessified. */
+
+#define FIRST_AUTOTAB_OP OP_NOT_DIGIT
+#define LAST_AUTOTAB_LEFT_OP OP_EXTUNI
+#define LAST_AUTOTAB_RIGHT_OP OP_DOLLM
enum {
OP_END, /* 0 End of pattern */
@@ -1912,10 +1966,15 @@ enum {
OP_EODN, /* 23 End of data or \n at end of data (\Z) */
OP_EOD, /* 24 End of data (\z) */
- OP_CIRC, /* 25 Start of line - not multiline */
- OP_CIRCM, /* 26 Start of line - multiline */
- OP_DOLL, /* 27 End of line - not multiline */
- OP_DOLLM, /* 28 End of line - multiline */
+ /* Line end assertions */
+
+ OP_DOLL, /* 25 End of line - not multiline */
+ OP_DOLLM, /* 26 End of line - multiline */
+ OP_CIRC, /* 27 Start of line - not multiline */
+ OP_CIRCM, /* 28 Start of line - multiline */
+
+ /* Single characters; caseful must precede the caseless ones */
+
OP_CHAR, /* 29 Match one character, casefully */
OP_CHARI, /* 30 Match one character, caselessly */
OP_NOT, /* 31 Match one character, not the given one, casefully */
@@ -1924,7 +1983,7 @@ enum {
/* The following sets of 13 opcodes must always be kept in step because
the offset from the first one is used to generate the others. */
- /**** Single characters, caseful, must precede the caseless ones ****/
+ /* Repeated characters; caseful must precede the caseless ones */
OP_STAR, /* 33 The maximizing and minimizing versions of */
OP_MINSTAR, /* 34 these six opcodes must come in pairs, with */
@@ -1942,7 +2001,7 @@ enum {
OP_POSQUERY, /* 44 Posesssified query, caseful */
OP_POSUPTO, /* 45 Possessified upto, caseful */
- /**** Single characters, caseless, must follow the caseful ones */
+ /* Repeated characters; caseless must follow the caseful ones */
OP_STARI, /* 46 */
OP_MINSTARI, /* 47 */
@@ -1960,8 +2019,8 @@ enum {
OP_POSQUERYI, /* 57 Posesssified query, caseless */
OP_POSUPTOI, /* 58 Possessified upto, caseless */
- /**** The negated ones must follow the non-negated ones, and match them ****/
- /**** Negated single character, caseful; must precede the caseless ones ****/
+ /* The negated ones must follow the non-negated ones, and match them */
+ /* Negated repeated character, caseful; must precede the caseless ones */
OP_NOTSTAR, /* 59 The maximizing and minimizing versions of */
OP_NOTMINSTAR, /* 60 these six opcodes must come in pairs, with */
@@ -1979,7 +2038,7 @@ enum {
OP_NOTPOSQUERY, /* 70 */
OP_NOTPOSUPTO, /* 71 */
- /**** Negated single character, caseless; must follow the caseful ones ****/
+ /* Negated repeated character, caseless; must follow the caseful ones */
OP_NOTSTARI, /* 72 */
OP_NOTMINSTARI, /* 73 */
@@ -1997,7 +2056,7 @@ enum {
OP_NOTPOSQUERYI, /* 83 */
OP_NOTPOSUPTOI, /* 84 */
- /**** Character types ****/
+ /* Character types */
OP_TYPESTAR, /* 85 The maximizing and minimizing versions of */
OP_TYPEMINSTAR, /* 86 these six opcodes must come in pairs, with */
@@ -2028,89 +2087,96 @@ enum {
OP_CRRANGE, /* 104 These are different to the three sets above. */
OP_CRMINRANGE, /* 105 */
+ OP_CRPOSSTAR, /* 106 Possessified versions */
+ OP_CRPOSPLUS, /* 107 */
+ OP_CRPOSQUERY, /* 108 */
+ OP_CRPOSRANGE, /* 109 */
+
/* End of quantifier opcodes */
- OP_CLASS, /* 106 Match a character class, chars < 256 only */
- OP_NCLASS, /* 107 Same, but the bitmap was created from a negative
+ OP_CLASS, /* 110 Match a character class, chars < 256 only */
+ OP_NCLASS, /* 111 Same, but the bitmap was created from a negative
class - the difference is relevant only when a
character > 255 is encountered. */
- OP_XCLASS, /* 108 Extended class for handling > 255 chars within the
+ OP_XCLASS, /* 112 Extended class for handling > 255 chars within the
class. This does both positive and negative. */
- OP_REF, /* 109 Match a back reference, casefully */
- OP_REFI, /* 110 Match a back reference, caselessly */
- OP_RECURSE, /* 111 Match a numbered subpattern (possibly recursive) */
- OP_CALLOUT, /* 112 Call out to external function if provided */
-
- OP_ALT, /* 113 Start of alternation */
- OP_KET, /* 114 End of group that doesn't have an unbounded repeat */
- OP_KETRMAX, /* 115 These two must remain together and in this */
- OP_KETRMIN, /* 116 order. They are for groups the repeat for ever. */
- OP_KETRPOS, /* 117 Possessive unlimited repeat. */
+ OP_REF, /* 113 Match a back reference, casefully */
+ OP_REFI, /* 114 Match a back reference, caselessly */
+ OP_DNREF, /* 115 Match a duplicate name backref, casefully */
+ OP_DNREFI, /* 116 Match a duplicate name backref, caselessly */
+ OP_RECURSE, /* 117 Match a numbered subpattern (possibly recursive) */
+ OP_CALLOUT, /* 118 Call out to external function if provided */
+
+ OP_ALT, /* 119 Start of alternation */
+ OP_KET, /* 120 End of group that doesn't have an unbounded repeat */
+ OP_KETRMAX, /* 121 These two must remain together and in this */
+ OP_KETRMIN, /* 122 order. They are for groups the repeat for ever. */
+ OP_KETRPOS, /* 123 Possessive unlimited repeat. */
/* The assertions must come before BRA, CBRA, ONCE, and COND, and the four
asserts must remain in order. */
- OP_REVERSE, /* 118 Move pointer back - used in lookbehind assertions */
- OP_ASSERT, /* 119 Positive lookahead */
- OP_ASSERT_NOT, /* 120 Negative lookahead */
- OP_ASSERTBACK, /* 121 Positive lookbehind */
- OP_ASSERTBACK_NOT, /* 122 Negative lookbehind */
+ OP_REVERSE, /* 124 Move pointer back - used in lookbehind assertions */
+ OP_ASSERT, /* 125 Positive lookahead */
+ OP_ASSERT_NOT, /* 126 Negative lookahead */
+ OP_ASSERTBACK, /* 127 Positive lookbehind */
+ OP_ASSERTBACK_NOT, /* 128 Negative lookbehind */
/* ONCE, ONCE_NC, BRA, BRAPOS, CBRA, CBRAPOS, and COND must come immediately
after the assertions, with ONCE first, as there's a test for >= ONCE for a
subpattern that isn't an assertion. The POS versions must immediately follow
the non-POS versions in each case. */
- OP_ONCE, /* 123 Atomic group, contains captures */
- OP_ONCE_NC, /* 124 Atomic group containing no captures */
- OP_BRA, /* 125 Start of non-capturing bracket */
- OP_BRAPOS, /* 126 Ditto, with unlimited, possessive repeat */
- OP_CBRA, /* 127 Start of capturing bracket */
- OP_CBRAPOS, /* 128 Ditto, with unlimited, possessive repeat */
- OP_COND, /* 129 Conditional group */
+ OP_ONCE, /* 129 Atomic group, contains captures */
+ OP_ONCE_NC, /* 130 Atomic group containing no captures */
+ OP_BRA, /* 131 Start of non-capturing bracket */
+ OP_BRAPOS, /* 132 Ditto, with unlimited, possessive repeat */
+ OP_CBRA, /* 133 Start of capturing bracket */
+ OP_CBRAPOS, /* 134 Ditto, with unlimited, possessive repeat */
+ OP_COND, /* 135 Conditional group */
/* These five must follow the previous five, in the same order. There's a
check for >= SBRA to distinguish the two sets. */
- OP_SBRA, /* 130 Start of non-capturing bracket, check empty */
- OP_SBRAPOS, /* 131 Ditto, with unlimited, possessive repeat */
- OP_SCBRA, /* 132 Start of capturing bracket, check empty */
- OP_SCBRAPOS, /* 133 Ditto, with unlimited, possessive repeat */
- OP_SCOND, /* 134 Conditional group, check empty */
+ OP_SBRA, /* 136 Start of non-capturing bracket, check empty */
+ OP_SBRAPOS, /* 137 Ditto, with unlimited, possessive repeat */
+ OP_SCBRA, /* 138 Start of capturing bracket, check empty */
+ OP_SCBRAPOS, /* 139 Ditto, with unlimited, possessive repeat */
+ OP_SCOND, /* 140 Conditional group, check empty */
/* The next two pairs must (respectively) be kept together. */
- OP_CREF, /* 135 Used to hold a capture number as condition */
- OP_NCREF, /* 136 Same, but generated by a name reference*/
- OP_RREF, /* 137 Used to hold a recursion number as condition */
- OP_NRREF, /* 138 Same, but generated by a name reference*/
- OP_DEF, /* 139 The DEFINE condition */
+ OP_CREF, /* 141 Used to hold a capture number as condition */
+ OP_DNCREF, /* 142 Used to point to duplicate names as a condition */
+ OP_RREF, /* 143 Used to hold a recursion number as condition */
+ OP_DNRREF, /* 144 Used to point to duplicate names as a condition */
+ OP_DEF, /* 145 The DEFINE condition */
- OP_BRAZERO, /* 140 These two must remain together and in this */
- OP_BRAMINZERO, /* 141 order. */
- OP_BRAPOSZERO, /* 142 */
+ OP_BRAZERO, /* 146 These two must remain together and in this */
+ OP_BRAMINZERO, /* 147 order. */
+ OP_BRAPOSZERO, /* 148 */
/* These are backtracking control verbs */
- OP_MARK, /* 143 always has an argument */
- OP_PRUNE, /* 144 */
- OP_PRUNE_ARG, /* 145 same, but with argument */
- OP_SKIP, /* 146 */
- OP_SKIP_ARG, /* 147 same, but with argument */
- OP_THEN, /* 148 */
- OP_THEN_ARG, /* 149 same, but with argument */
- OP_COMMIT, /* 150 */
+ OP_MARK, /* 149 always has an argument */
+ OP_PRUNE, /* 150 */
+ OP_PRUNE_ARG, /* 151 same, but with argument */
+ OP_SKIP, /* 152 */
+ OP_SKIP_ARG, /* 153 same, but with argument */
+ OP_THEN, /* 154 */
+ OP_THEN_ARG, /* 155 same, but with argument */
+ OP_COMMIT, /* 156 */
/* These are forced failure and success verbs */
- OP_FAIL, /* 151 */
- OP_ACCEPT, /* 152 */
- OP_ASSERT_ACCEPT, /* 153 Used inside assertions */
- OP_CLOSE, /* 154 Used before OP_ACCEPT to close open captures */
+ OP_FAIL, /* 157 */
+ OP_ACCEPT, /* 158 */
+ OP_ASSERT_ACCEPT, /* 159 Used inside assertions */
+ OP_CLOSE, /* 160 Used before OP_ACCEPT to close open captures */
/* This is used to skip a subpattern with a {0} quantifier */
- OP_SKIPZERO, /* 155 */
+ OP_SKIPZERO, /* 161 */
/* This is not an opcode, but is used to check that tables indexed by opcode
are the correct length, in order to catch updating errors - there have been
@@ -2121,7 +2187,8 @@ enum {
/* *** NOTE NOTE NOTE *** Whenever the list above is updated, the two macro
definitions that follow must also be updated to match. There are also tables
-called "coptable" and "poptable" in pcre_dfa_exec.c that must be updated. */
+called "opcode_possessify" in pcre_compile.c and "coptable" and "poptable" in
+pcre_dfa_exec.c that must be updated. */
/* This macro defines textual names for all the opcodes. These are used only
@@ -2134,7 +2201,7 @@ some cases doesn't actually use these names at all). */
"\\S", "\\s", "\\W", "\\w", "Any", "AllAny", "Anybyte", \
"notprop", "prop", "\\R", "\\H", "\\h", "\\V", "\\v", \
"extuni", "\\Z", "\\z", \
- "^", "^", "$", "$", "char", "chari", "not", "noti", \
+ "$", "$", "^", "^", "char", "chari", "not", "noti", \
"*", "*?", "+", "+?", "?", "??", \
"{", "{", "{", \
"*+","++", "?+", "{", \
@@ -2150,7 +2217,8 @@ some cases doesn't actually use these names at all). */
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", \
"*+","++", "?+", "{", \
"*", "*?", "+", "+?", "?", "??", "{", "{", \
- "class", "nclass", "xclass", "Ref", "Refi", \
+ "*+","++", "?+", "{", \
+ "class", "nclass", "xclass", "Ref", "Refi", "DnRef", "DnRefi", \
"Recurse", "Callout", \
"Alt", "Ket", "KetRmax", "KetRmin", "KetRpos", \
"Reverse", "Assert", "Assert not", "AssertB", "AssertB not", \
@@ -2159,7 +2227,7 @@ some cases doesn't actually use these names at all). */
"Cond", \
"SBra", "SBraPos", "SCBra", "SCBraPos", \
"SCond", \
- "Cond ref", "Cond nref", "Cond rec", "Cond nrec", "Cond def", \
+ "Cond ref", "Cond dnref", "Cond rec", "Cond dnrec", "Cond def", \
"Brazero", "Braminzero", "Braposzero", \
"*MARK", "*PRUNE", "*PRUNE", "*SKIP", "*SKIP", \
"*THEN", "*THEN", "*COMMIT", "*FAIL", \
@@ -2184,7 +2252,7 @@ in UTF-8 mode. The code that uses this table must know about such things. */
3, 3, /* \P, \p */ \
1, 1, 1, 1, 1, /* \R, \H, \h, \V, \v */ \
1, /* \X */ \
- 1, 1, 1, 1, 1, 1, /* \Z, \z, ^, ^M, $, $M */ \
+ 1, 1, 1, 1, 1, 1, /* \Z, \z, $, $M ^, ^M */ \
2, /* Char - the minimum length */ \
2, /* Chari - the minimum length */ \
2, /* not */ \
@@ -2215,11 +2283,14 @@ in UTF-8 mode. The code that uses this table must know about such things. */
/* Character class & ref repeats */ \
1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */ \
1+2*IMM2_SIZE, 1+2*IMM2_SIZE, /* CRRANGE, CRMINRANGE */ \
+ 1, 1, 1, 1+2*IMM2_SIZE, /* Possessive *+, ++, ?+, CRPOSRANGE */ \
1+(32/sizeof(pcre_uchar)), /* CLASS */ \
1+(32/sizeof(pcre_uchar)), /* NCLASS */ \
0, /* XCLASS - variable length */ \
1+IMM2_SIZE, /* REF */ \
1+IMM2_SIZE, /* REFI */ \
+ 1+2*IMM2_SIZE, /* DNREF */ \
+ 1+2*IMM2_SIZE, /* DNREFI */ \
1+LINK_SIZE, /* RECURSE */ \
2+2*LINK_SIZE, /* CALLOUT */ \
1+LINK_SIZE, /* Alt */ \
@@ -2244,8 +2315,8 @@ in UTF-8 mode. The code that uses this table must know about such things. */
1+LINK_SIZE+IMM2_SIZE, /* SCBRA */ \
1+LINK_SIZE+IMM2_SIZE, /* SCBRAPOS */ \
1+LINK_SIZE, /* SCOND */ \
- 1+IMM2_SIZE, 1+IMM2_SIZE, /* CREF, NCREF */ \
- 1+IMM2_SIZE, 1+IMM2_SIZE, /* RREF, NRREF */ \
+ 1+IMM2_SIZE, 1+2*IMM2_SIZE, /* CREF, DNCREF */ \
+ 1+IMM2_SIZE, 1+2*IMM2_SIZE, /* RREF, DNRREF */ \
1, /* DEF */ \
1, 1, 1, /* BRAZERO, BRAMINZERO, BRAPOSZERO */ \
3, 1, 3, /* MARK, PRUNE, PRUNE_ARG */ \
@@ -2254,8 +2325,7 @@ in UTF-8 mode. The code that uses this table must know about such things. */
1, 1, 1, 1, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */ \
1+IMM2_SIZE, 1 /* CLOSE, SKIPZERO */
-/* A magic value for OP_RREF and OP_NRREF to indicate the "any recursion"
-condition. */
+/* A magic value for OP_RREF to indicate the "any recursion" condition. */
#define RREF_ANY 0xffff
@@ -2270,9 +2340,11 @@ enum { ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9,
ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,
ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,
ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69,
- ERR70, ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERRCOUNT };
+ ERR70, ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79,
+ ERR80, ERR81, ERR82, ERR83, ERR84, ERRCOUNT };
/* JIT compiling modes. The function list is indexed by them. */
+
enum { JIT_COMPILE, JIT_PARTIAL_SOFT_COMPILE, JIT_PARTIAL_HARD_COMPILE,
JIT_NUMBER_OF_COMPILE_MODES };
@@ -2280,48 +2352,49 @@ enum { JIT_COMPILE, JIT_PARTIAL_SOFT_COMPILE, JIT_PARTIAL_HARD_COMPILE,
code vector run on as long as necessary after the end. We store an explicit
offset to the name table so that if a regex is compiled on one host, saved, and
then run on another where the size of pointers is different, all might still
-be well. For the case of compiled-on-4 and run-on-8, we include an extra
-pointer that is always NULL. For future-proofing, a few dummy fields were
-originally included - even though you can never get this planning right - but
-there is only one left now.
-
-NOTE NOTE NOTE:
-Because people can now save and re-use compiled patterns, any additions to this
-structure should be made at the end, and something earlier (e.g. a new
-flag in the options or one of the dummy fields) should indicate that the new
-fields are present. Currently PCRE always sets the dummy fields to zero.
-NOTE NOTE NOTE
+be well.
+
+The size of the structure must be a multiple of 8 bytes. For the case of
+compiled-on-4 and run-on-8, we include an extra pointer that is always NULL so
+that there are an even number of pointers which therefore are a multiple of 8
+bytes.
+
+It is necessary to fork the struct for the 32 bit library, since it needs to
+use pcre_uint32 for first_char and req_char. We can't put an ifdef inside the
+typedef because pcretest needs access to the struct of the 8-, 16- and 32-bit
+variants.
+
+*** WARNING ***
+When new fields are added to these structures, remember to adjust the code in
+pcre_byte_order.c that is concerned with swapping the byte order of the fields
+when a compiled regex is reloaded on a host with different endianness.
+*** WARNING ***
+There is also similar byte-flipping code in pcretest.c, which is used for
+testing the byte-flipping features. It must also be kept in step.
+*** WARNING ***
*/
-#if defined COMPILE_PCRE8
-#define REAL_PCRE real_pcre
-#elif defined COMPILE_PCRE16
-#define REAL_PCRE real_pcre16
-#elif defined COMPILE_PCRE32
-#define REAL_PCRE real_pcre32
-#endif
-
-/* It is necessary to fork the struct for 32 bit, since it needs to use
- * pcre_uchar for first_char and req_char. Can't put an ifdef inside the
- * typedef since pcretest needs access to the struct of the 8-, 16-
- * and 32-bit variants. */
-
typedef struct real_pcre8_or_16 {
pcre_uint32 magic_number;
pcre_uint32 size; /* Total that was malloced */
pcre_uint32 options; /* Public options */
- pcre_uint16 flags; /* Private flags */
+ pcre_uint32 flags; /* Private flags */
+ pcre_uint32 limit_match; /* Limit set from regex */
+ pcre_uint32 limit_recursion; /* Limit set from regex */
+ pcre_uint16 first_char; /* Starting character */
+ pcre_uint16 req_char; /* This character must be seen */
pcre_uint16 max_lookbehind; /* Longest lookbehind (characters) */
pcre_uint16 top_bracket; /* Highest numbered group */
pcre_uint16 top_backref; /* Highest numbered back reference */
- pcre_uint16 first_char; /* Starting character */
- pcre_uint16 req_char; /* This character must be seen */
pcre_uint16 name_table_offset; /* Offset to name table that follows */
pcre_uint16 name_entry_size; /* Size of any name items */
pcre_uint16 name_count; /* Number of name items */
pcre_uint16 ref_count; /* Reference count */
+ pcre_uint16 dummy1; /* To ensure size is a multiple of 8 */
+ pcre_uint16 dummy2; /* To ensure size is a multiple of 8 */
+ pcre_uint16 dummy3; /* To ensure size is a multiple of 8 */
const pcre_uint8 *tables; /* Pointer to tables or NULL for std */
- const pcre_uint8 *nullpad; /* NULL padding */
+ void *nullpad; /* NULL padding */
} real_pcre8_or_16;
typedef struct real_pcre8_or_16 real_pcre;
@@ -2331,22 +2404,31 @@ typedef struct real_pcre32 {
pcre_uint32 magic_number;
pcre_uint32 size; /* Total that was malloced */
pcre_uint32 options; /* Public options */
- pcre_uint16 flags; /* Private flags */
+ pcre_uint32 flags; /* Private flags */
+ pcre_uint32 limit_match; /* Limit set from regex */
+ pcre_uint32 limit_recursion; /* Limit set from regex */
+ pcre_uint32 first_char; /* Starting character */
+ pcre_uint32 req_char; /* This character must be seen */
pcre_uint16 max_lookbehind; /* Longest lookbehind (characters) */
pcre_uint16 top_bracket; /* Highest numbered group */
pcre_uint16 top_backref; /* Highest numbered back reference */
- pcre_uint32 first_char; /* Starting character */
- pcre_uint32 req_char; /* This character must be seen */
pcre_uint16 name_table_offset; /* Offset to name table that follows */
pcre_uint16 name_entry_size; /* Size of any name items */
pcre_uint16 name_count; /* Number of name items */
pcre_uint16 ref_count; /* Reference count */
- pcre_uint16 dummy1; /* for later expansion */
- pcre_uint16 dummy2; /* for later expansion */
+ pcre_uint16 dummy; /* To ensure size is a multiple of 8 */
const pcre_uint8 *tables; /* Pointer to tables or NULL for std */
- void *nullpad; /* for later expansion */
+ void *nullpad; /* NULL padding */
} real_pcre32;
+#if defined COMPILE_PCRE8
+#define REAL_PCRE real_pcre
+#elif defined COMPILE_PCRE16
+#define REAL_PCRE real_pcre16
+#elif defined COMPILE_PCRE32
+#define REAL_PCRE real_pcre32
+#endif
+
/* Assert that the size of REAL_PCRE is divisible by 8 */
typedef int __assert_real_pcre_size_divisible_8[(sizeof(REAL_PCRE) % 8) == 0 ? 1 : -1];
@@ -2380,6 +2462,15 @@ typedef struct open_capitem {
pcre_uint16 flag; /* Set TRUE if recursive back ref */
} open_capitem;
+/* Structure for building a list of named groups during the first pass of
+compiling. */
+
+typedef struct named_group {
+ const pcre_uchar *name; /* Points to the name in the pattern */
+ int length; /* Length of the name */
+ pcre_uint32 number; /* Group number */
+} named_group;
+
/* Structure for passing "static" information around between the functions
doing the compiling, so that they are thread-safe. */
@@ -2392,24 +2483,29 @@ typedef struct compile_data {
const pcre_uchar *start_code; /* The start of the compiled code */
const pcre_uchar *start_pattern; /* The start of the pattern */
const pcre_uchar *end_pattern; /* The end of the pattern */
- open_capitem *open_caps; /* Chain of open capture items */
pcre_uchar *hwm; /* High watermark of workspace */
+ open_capitem *open_caps; /* Chain of open capture items */
+ named_group *named_groups; /* Points to vector in pre-compile */
pcre_uchar *name_table; /* The name/number table */
int names_found; /* Number of entries so far */
int name_entry_size; /* Size of each entry */
+ int named_group_list_size; /* Number of entries in the list */
int workspace_size; /* Size of workspace */
- unsigned int bracount; /* Count of capturing parens as we compile */
+ unsigned int bracount; /* Count of capturing parens as we compile */
int final_bracount; /* Saved value after first pass */
int max_lookbehind; /* Maximum lookbehind (characters) */
int top_backref; /* Maximum back reference */
unsigned int backref_map; /* Bitmap of low back refs */
+ unsigned int namedrefcount; /* Number of backreferences by name */
+ int parens_depth; /* Depth of nested parentheses */
int assert_depth; /* Depth of nested assertions */
- int external_options; /* External (initial) options */
- int external_flags; /* External flag bits to be set */
+ pcre_uint32 external_options; /* External (initial) options */
+ pcre_uint32 external_flags; /* External flag bits to be set */
int req_varyopt; /* "After variable item" flag for reqbyte */
BOOL had_accept; /* (*ACCEPT) encountered */
BOOL had_pruneorskip; /* (*PRUNE) or (*SKIP) encountered */
BOOL check_lookbehind; /* Lookbehinds need later checking */
+ BOOL dupnames; /* Duplicate names exist */
int nltype; /* Newline type */
int nllen; /* Newline string length */
pcre_uchar nl[4]; /* Newline string when fixed length */
@@ -2431,6 +2527,7 @@ typedef struct recursion_info {
unsigned int group_num; /* Number of group that was called */
int *offset_save; /* Pointer to start of saved offsets */
int saved_max; /* Number of saved offsets */
+ int saved_capture_last; /* Last capture number */
PCRE_PUCHAR subject_position; /* Position at start of recursion */
} recursion_info;
@@ -2467,12 +2564,13 @@ typedef struct match_data {
int nllen; /* Newline string length */
int name_count; /* Number of names in name table */
int name_entry_size; /* Size of entry in names table */
+ unsigned int skip_arg_count; /* For counting SKIP_ARGs */
+ unsigned int ignore_skip_arg; /* For re-run when SKIP arg name not found */
pcre_uchar *name_table; /* Table of names */
pcre_uchar nl[4]; /* Newline string when fixed */
const pcre_uint8 *lcc; /* Points to lower casing table */
const pcre_uint8 *fcc; /* Points to case-flipping table */
const pcre_uint8 *ctypes; /* Points to table of type maps */
- BOOL offset_overflow; /* Set if too many extractions */
BOOL notbol; /* NOTBOL flag */
BOOL noteol; /* NOTEOL flag */
BOOL utf; /* UTF-8 / UTF-16 flag */
@@ -2484,7 +2582,6 @@ typedef struct match_data {
BOOL hitend; /* Hit the end of the subject at some point */
BOOL bsr_anycrlf; /* \R is just any CRLF, not full Unicode */
BOOL hasthen; /* Pattern contains (*THEN) */
- BOOL ignore_skip_arg; /* For re-run when SKIP name not found */
const pcre_uchar *start_code; /* For use when recursing */
PCRE_PUCHAR start_subject; /* Start of the subject string */
PCRE_PUCHAR end_subject; /* End of the subject string */
@@ -2493,7 +2590,7 @@ typedef struct match_data {
PCRE_PUCHAR start_used_ptr; /* Earliest consulted character */
int partial; /* PARTIAL options */
int end_offset_top; /* Highwater mark at end of match */
- int capture_last; /* Most recent capture number */
+ pcre_int32 capture_last; /* Most recent capture number + overflow flag */
int start_offset; /* The start offset value */
int match_function_type; /* Set for certain special calls of MATCH() */
eptrblock *eptrchain; /* Chain of eptrblocks for tail recursions */
diff --git a/src/3rdparty/pcre/pcre_jit_compile.c b/src/3rdparty/pcre/pcre_jit_compile.c
index 78fe75d57e..a318708b46 100644
--- a/src/3rdparty/pcre/pcre_jit_compile.c
+++ b/src/3rdparty/pcre/pcre_jit_compile.c
@@ -6,10 +6,10 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
- Copyright (c) 1997-2012 University of Cambridge
+ Copyright (c) 1997-2013 University of Cambridge
The machine code generator part (this module) was written by Zoltan Herczeg
- Copyright (c) 2010-2012
+ Copyright (c) 2010-2013
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -65,6 +65,15 @@ system files. */
#error Unsupported architecture
#endif
+/* Defines for debugging purposes. */
+
+/* 1 - Use unoptimized capturing brackets.
+ 2 - Enable capture_last_ptr (includes option 1). */
+/* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
+
+/* 1 - Always have a control head. */
+/* #define DEBUG_FORCE_CONTROL_HEAD 1 */
+
/* Allocate memory for the regex stack on the real machine stack.
Fast, but limited size. */
#define MACHINE_STACK_SIZE 32768
@@ -157,9 +166,11 @@ typedef struct jit_arguments {
int *offsets;
pcre_uchar *uchar_ptr;
pcre_uchar *mark_ptr;
+ void *callout_data;
/* Everything else after. */
- int offsetcount;
- int calllimit;
+ pcre_uint32 limit_match;
+ int real_offset_count;
+ int offset_count;
pcre_uint8 notbol;
pcre_uint8 noteol;
pcre_uint8 notempty;
@@ -171,6 +182,7 @@ typedef struct executable_functions {
PUBL(jit_callback) callback;
void *userdata;
pcre_uint32 top_bracket;
+ pcre_uint32 limit_match;
sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
} executable_functions;
@@ -179,21 +191,27 @@ typedef struct jump_list {
struct jump_list *next;
} jump_list;
-enum stub_types { stack_alloc };
-
typedef struct stub_list {
- enum stub_types type;
- int data;
struct sljit_jump *start;
struct sljit_label *quit;
struct stub_list *next;
} stub_list;
+enum frame_types {
+ no_frame = -1,
+ no_stack = -2
+};
+
+enum control_types {
+ type_mark = 0,
+ type_then_trap = 1
+};
+
typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
/* The following structure is the key data type for the recursive
code generator. It is allocated by compile_matchingpath, and contains
-the aguments for compile_backtrackingpath. Must be the first member
+the arguments for compile_backtrackingpath. Must be the first member
of its descendants. */
typedef struct backtrack_common {
/* Concatenation stack. */
@@ -209,7 +227,7 @@ typedef struct backtrack_common {
typedef struct assert_backtrack {
backtrack_common common;
jump_list *condfailed;
- /* Less than 0 (-1) if a frame is not needed. */
+ /* Less than 0 if a frame is not needed. */
int framesize;
/* Points to our private memory word on the stack. */
int private_data_ptr;
@@ -230,7 +248,7 @@ typedef struct bracket_backtrack {
/* Both for OP_COND, OP_SCOND. */
jump_list *condfailed;
assert_backtrack *assert;
- /* For OP_ONCE. -1 if not needed. */
+ /* For OP_ONCE. Less than 0 if not needed. */
int framesize;
} u;
/* Points to our private memory word on the stack. */
@@ -265,31 +283,52 @@ typedef struct recurse_entry {
/* Collects the calls until the function is not created. */
jump_list *calls;
/* Points to the starting opcode. */
- int start;
+ sljit_sw start;
} recurse_entry;
typedef struct recurse_backtrack {
backtrack_common common;
+ BOOL inlined_pattern;
} recurse_backtrack;
+#define OP_THEN_TRAP OP_TABLE_LENGTH
+
+typedef struct then_trap_backtrack {
+ backtrack_common common;
+ /* If then_trap is not NULL, this structure contains the real
+ then_trap for the backtracking path. */
+ struct then_trap_backtrack *then_trap;
+ /* Points to the starting opcode. */
+ sljit_sw start;
+ /* Exit point for the then opcodes of this alternative. */
+ jump_list *quit;
+ /* Frame size of the current alternative. */
+ int framesize;
+} then_trap_backtrack;
+
#define MAX_RANGE_SIZE 6
typedef struct compiler_common {
+ /* The sljit ceneric compiler. */
struct sljit_compiler *compiler;
+ /* First byte code. */
pcre_uchar *start;
-
/* Maps private data offset to each opcode. */
- int *private_data_ptrs;
+ sljit_si *private_data_ptrs;
/* Tells whether the capturing bracket is optimized. */
pcre_uint8 *optimized_cbracket;
+ /* Tells whether the starting offset is a target of then. */
+ pcre_uint8 *then_offsets;
+ /* Current position where a THEN must jump. */
+ then_trap_backtrack *then_trap;
/* Starting offset of private data for capturing brackets. */
- int cbraptr;
- /* OVector starting point. Must be divisible by 2. */
+ int cbra_ptr;
+ /* Output vector starting point. Must be divisible by 2. */
int ovector_start;
/* Last known position of the requested byte. */
int req_char_ptr;
/* Head of the last recursion. */
- int recursive_head;
+ int recursive_head_ptr;
/* First inspected character for partial matching. */
int start_used_ptr;
/* Starting pointer for partial soft matches. */
@@ -298,36 +337,56 @@ typedef struct compiler_common {
int first_line_end;
/* Points to the marked string. */
int mark_ptr;
+ /* Recursive control verb management chain. */
+ int control_head_ptr;
+ /* Points to the last matched capture block index. */
+ int capture_last_ptr;
+ /* Points to the starting position of the current match. */
+ int start_ptr;
/* Flipped and lower case tables. */
const pcre_uint8 *fcc;
sljit_sw lcc;
/* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
int mode;
+ /* \K is found in the pattern. */
+ BOOL has_set_som;
+ /* (*SKIP:arg) is found in the pattern. */
+ BOOL has_skip_arg;
+ /* (*THEN) is found in the pattern. */
+ BOOL has_then;
+ /* Needs to know the start position anytime. */
+ BOOL needs_start_ptr;
+ /* Currently in recurse or negative assert. */
+ BOOL local_exit;
+ /* Currently in a positive assert. */
+ BOOL positive_assert;
/* Newline control. */
int nltype;
int newline;
int bsr_nltype;
/* Dollar endonly. */
int endonly;
- BOOL has_set_som;
/* Tables. */
sljit_sw ctypes;
int digits[2 + MAX_RANGE_SIZE];
/* Named capturing brackets. */
- sljit_uw name_table;
+ pcre_uchar *name_table;
sljit_sw name_count;
sljit_sw name_entry_size;
/* Labels and jump lists. */
struct sljit_label *partialmatchlabel;
- struct sljit_label *quitlabel;
- struct sljit_label *acceptlabel;
+ struct sljit_label *quit_label;
+ struct sljit_label *forced_quit_label;
+ struct sljit_label *accept_label;
stub_list *stubs;
recurse_entry *entries;
recurse_entry *currententry;
jump_list *partialmatch;
jump_list *quit;
+ jump_list *positive_assert_quit;
+ jump_list *forced_quit;
jump_list *accept;
jump_list *calllimit;
jump_list *stackalloc;
@@ -338,6 +397,7 @@ typedef struct compiler_common {
jump_list *vspace;
jump_list *casefulcmp;
jump_list *caselesscmp;
+ jump_list *reset_match;
BOOL jscript_compat;
#ifdef SUPPORT_UTF
BOOL utf;
@@ -390,12 +450,6 @@ typedef struct compare_context {
#endif
} compare_context;
-enum {
- frame_end = 0,
- frame_setstrbegin = -1,
- frame_setmark = -2
-};
-
/* Undefine sljit macros. */
#undef CMP
@@ -410,7 +464,7 @@ enum {
#define STACK_TOP SLJIT_SCRATCH_REG2
#define STACK_LIMIT SLJIT_SAVED_REG3
#define ARGUMENTS SLJIT_SAVED_EREG1
-#define CALL_COUNT SLJIT_SAVED_EREG2
+#define COUNT_MATCH SLJIT_SAVED_EREG2
#define RETURN_ADDR SLJIT_TEMPORARY_EREG1
/* Local space layout. */
@@ -421,14 +475,14 @@ enum {
#define POSSESSIVE0 (2 * sizeof(sljit_sw))
#define POSSESSIVE1 (3 * sizeof(sljit_sw))
/* Max limit of recursions. */
-#define CALL_LIMIT (4 * sizeof(sljit_sw))
+#define LIMIT_MATCH (4 * sizeof(sljit_sw))
/* The output vector is stored on the stack, and contains pointers
to characters. The vector data is divided into two groups: the first
group contains the start / end character pointers, and the second is
the start pointers when the end of the capturing group has not yet reached. */
#define OVECTOR_START (common->ovector_start)
-#define OVECTOR(i) (OVECTOR_START + (i) * sizeof(sljit_sw))
-#define OVECTOR_PRIV(i) (common->cbraptr + (i) * sizeof(sljit_sw))
+#define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
+#define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
#define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
#if defined COMPILE_PCRE8
@@ -459,6 +513,8 @@ the start pointers when the end of the capturing group has not yet reached. */
sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
#define JUMPHERE(jump) \
sljit_set_label((jump), sljit_emit_label(compiler))
+#define SET_LABEL(jump, label) \
+ sljit_set_label((jump), (label))
#define CMP(type, src1, src1w, src2, src2w) \
sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
#define CMPTO(type, src1, src1w, src2, src2w, label) \
@@ -479,11 +535,11 @@ return cc;
/* Functions whose might need modification for all new supported opcodes:
next_opcode
- get_private_data_length
+ check_opcode_types
set_private_data_ptrs
get_framesize
init_frame
- get_private_data_length_for_copy
+ get_private_data_copy_length
copy_private_data
compile_matchingpath
compile_backtrackingpath
@@ -507,6 +563,8 @@ switch(*cc)
case OP_WORDCHAR:
case OP_ANY:
case OP_ALLANY:
+ case OP_NOTPROP:
+ case OP_PROP:
case OP_ANYNL:
case OP_NOT_HSPACE:
case OP_HSPACE:
@@ -519,37 +577,66 @@ switch(*cc)
case OP_CIRCM:
case OP_DOLL:
case OP_DOLLM:
- case OP_TYPESTAR:
- case OP_TYPEMINSTAR:
- case OP_TYPEPLUS:
- case OP_TYPEMINPLUS:
- case OP_TYPEQUERY:
- case OP_TYPEMINQUERY:
- case OP_TYPEPOSSTAR:
- case OP_TYPEPOSPLUS:
- case OP_TYPEPOSQUERY:
case OP_CRSTAR:
case OP_CRMINSTAR:
case OP_CRPLUS:
case OP_CRMINPLUS:
case OP_CRQUERY:
case OP_CRMINQUERY:
+ case OP_CRRANGE:
+ case OP_CRMINRANGE:
+ case OP_CRPOSSTAR:
+ case OP_CRPOSPLUS:
+ case OP_CRPOSQUERY:
+ case OP_CRPOSRANGE:
+ case OP_CLASS:
+ case OP_NCLASS:
+ case OP_REF:
+ case OP_REFI:
+ case OP_DNREF:
+ case OP_DNREFI:
+ case OP_RECURSE:
+ case OP_CALLOUT:
+ case OP_ALT:
+ case OP_KET:
+ case OP_KETRMAX:
+ case OP_KETRMIN:
+ case OP_KETRPOS:
+ case OP_REVERSE:
+ case OP_ASSERT:
+ case OP_ASSERT_NOT:
+ case OP_ASSERTBACK:
+ case OP_ASSERTBACK_NOT:
+ case OP_ONCE:
+ case OP_ONCE_NC:
+ case OP_BRA:
+ case OP_BRAPOS:
+ case OP_CBRA:
+ case OP_CBRAPOS:
+ case OP_COND:
+ case OP_SBRA:
+ case OP_SBRAPOS:
+ case OP_SCBRA:
+ case OP_SCBRAPOS:
+ case OP_SCOND:
+ case OP_CREF:
+ case OP_DNCREF:
+ case OP_RREF:
+ case OP_DNRREF:
case OP_DEF:
case OP_BRAZERO:
case OP_BRAMINZERO:
case OP_BRAPOSZERO:
+ case OP_PRUNE:
+ case OP_SKIP:
+ case OP_THEN:
case OP_COMMIT:
case OP_FAIL:
case OP_ACCEPT:
case OP_ASSERT_ACCEPT:
+ case OP_CLOSE:
case OP_SKIPZERO:
- return cc + 1;
-
- case OP_ANYBYTE:
-#ifdef SUPPORT_UTF
- if (common->utf) return NULL;
-#endif
- return cc + 1;
+ return cc + PRIV(OP_lengths)[*cc];
case OP_CHAR:
case OP_CHARI:
@@ -561,222 +648,106 @@ switch(*cc)
case OP_MINPLUS:
case OP_QUERY:
case OP_MINQUERY:
+ case OP_UPTO:
+ case OP_MINUPTO:
+ case OP_EXACT:
case OP_POSSTAR:
case OP_POSPLUS:
case OP_POSQUERY:
+ case OP_POSUPTO:
case OP_STARI:
case OP_MINSTARI:
case OP_PLUSI:
case OP_MINPLUSI:
case OP_QUERYI:
case OP_MINQUERYI:
+ case OP_UPTOI:
+ case OP_MINUPTOI:
+ case OP_EXACTI:
case OP_POSSTARI:
case OP_POSPLUSI:
case OP_POSQUERYI:
+ case OP_POSUPTOI:
case OP_NOTSTAR:
case OP_NOTMINSTAR:
case OP_NOTPLUS:
case OP_NOTMINPLUS:
case OP_NOTQUERY:
case OP_NOTMINQUERY:
+ case OP_NOTUPTO:
+ case OP_NOTMINUPTO:
+ case OP_NOTEXACT:
case OP_NOTPOSSTAR:
case OP_NOTPOSPLUS:
case OP_NOTPOSQUERY:
+ case OP_NOTPOSUPTO:
case OP_NOTSTARI:
case OP_NOTMINSTARI:
case OP_NOTPLUSI:
case OP_NOTMINPLUSI:
case OP_NOTQUERYI:
case OP_NOTMINQUERYI:
- case OP_NOTPOSSTARI:
- case OP_NOTPOSPLUSI:
- case OP_NOTPOSQUERYI:
- cc += 2;
-#ifdef SUPPORT_UTF
- if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
-#endif
- return cc;
-
- case OP_UPTO:
- case OP_MINUPTO:
- case OP_EXACT:
- case OP_POSUPTO:
- case OP_UPTOI:
- case OP_MINUPTOI:
- case OP_EXACTI:
- case OP_POSUPTOI:
- case OP_NOTUPTO:
- case OP_NOTMINUPTO:
- case OP_NOTEXACT:
- case OP_NOTPOSUPTO:
case OP_NOTUPTOI:
case OP_NOTMINUPTOI:
case OP_NOTEXACTI:
+ case OP_NOTPOSSTARI:
+ case OP_NOTPOSPLUSI:
+ case OP_NOTPOSQUERYI:
case OP_NOTPOSUPTOI:
- cc += 2 + IMM2_SIZE;
+ cc += PRIV(OP_lengths)[*cc];
#ifdef SUPPORT_UTF
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
#endif
return cc;
- case OP_NOTPROP:
- case OP_PROP:
- return cc + 1 + 2;
-
+ /* Special cases. */
+ case OP_TYPESTAR:
+ case OP_TYPEMINSTAR:
+ case OP_TYPEPLUS:
+ case OP_TYPEMINPLUS:
+ case OP_TYPEQUERY:
+ case OP_TYPEMINQUERY:
case OP_TYPEUPTO:
case OP_TYPEMINUPTO:
case OP_TYPEEXACT:
+ case OP_TYPEPOSSTAR:
+ case OP_TYPEPOSPLUS:
+ case OP_TYPEPOSQUERY:
case OP_TYPEPOSUPTO:
- case OP_REF:
- case OP_REFI:
- case OP_CREF:
- case OP_NCREF:
- case OP_RREF:
- case OP_NRREF:
- case OP_CLOSE:
- cc += 1 + IMM2_SIZE;
- return cc;
+ return cc + PRIV(OP_lengths)[*cc] - 1;
- case OP_CRRANGE:
- case OP_CRMINRANGE:
- return cc + 1 + 2 * IMM2_SIZE;
-
- case OP_CLASS:
- case OP_NCLASS:
- return cc + 1 + 32 / sizeof(pcre_uchar);
+ case OP_ANYBYTE:
+#ifdef SUPPORT_UTF
+ if (common->utf) return NULL;
+#endif
+ return cc + 1;
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
case OP_XCLASS:
return cc + GET(cc, 1);
#endif
- case OP_RECURSE:
- case OP_ASSERT:
- case OP_ASSERT_NOT:
- case OP_ASSERTBACK:
- case OP_ASSERTBACK_NOT:
- case OP_REVERSE:
- case OP_ONCE:
- case OP_ONCE_NC:
- case OP_BRA:
- case OP_BRAPOS:
- case OP_COND:
- case OP_SBRA:
- case OP_SBRAPOS:
- case OP_SCOND:
- case OP_ALT:
- case OP_KET:
- case OP_KETRMAX:
- case OP_KETRMIN:
- case OP_KETRPOS:
- return cc + 1 + LINK_SIZE;
-
- case OP_CBRA:
- case OP_CBRAPOS:
- case OP_SCBRA:
- case OP_SCBRAPOS:
- return cc + 1 + LINK_SIZE + IMM2_SIZE;
-
case OP_MARK:
+ case OP_PRUNE_ARG:
+ case OP_SKIP_ARG:
+ case OP_THEN_ARG:
return cc + 1 + 2 + cc[1];
default:
+ /* All opcodes are supported now! */
+ SLJIT_ASSERT_STOP();
return NULL;
}
}
-#define CASE_ITERATOR_PRIVATE_DATA_1 \
- case OP_MINSTAR: \
- case OP_MINPLUS: \
- case OP_QUERY: \
- case OP_MINQUERY: \
- case OP_MINSTARI: \
- case OP_MINPLUSI: \
- case OP_QUERYI: \
- case OP_MINQUERYI: \
- case OP_NOTMINSTAR: \
- case OP_NOTMINPLUS: \
- case OP_NOTQUERY: \
- case OP_NOTMINQUERY: \
- case OP_NOTMINSTARI: \
- case OP_NOTMINPLUSI: \
- case OP_NOTQUERYI: \
- case OP_NOTMINQUERYI:
-
-#define CASE_ITERATOR_PRIVATE_DATA_2A \
- case OP_STAR: \
- case OP_PLUS: \
- case OP_STARI: \
- case OP_PLUSI: \
- case OP_NOTSTAR: \
- case OP_NOTPLUS: \
- case OP_NOTSTARI: \
- case OP_NOTPLUSI:
-
-#define CASE_ITERATOR_PRIVATE_DATA_2B \
- case OP_UPTO: \
- case OP_MINUPTO: \
- case OP_UPTOI: \
- case OP_MINUPTOI: \
- case OP_NOTUPTO: \
- case OP_NOTMINUPTO: \
- case OP_NOTUPTOI: \
- case OP_NOTMINUPTOI:
-
-#define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
- case OP_TYPEMINSTAR: \
- case OP_TYPEMINPLUS: \
- case OP_TYPEQUERY: \
- case OP_TYPEMINQUERY:
-
-#define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
- case OP_TYPESTAR: \
- case OP_TYPEPLUS:
-
-#define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
- case OP_TYPEUPTO: \
- case OP_TYPEMINUPTO:
-
-static int get_class_iterator_size(pcre_uchar *cc)
+static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
{
-switch(*cc)
- {
- case OP_CRSTAR:
- case OP_CRPLUS:
- return 2;
-
- case OP_CRMINSTAR:
- case OP_CRMINPLUS:
- case OP_CRQUERY:
- case OP_CRMINQUERY:
- return 1;
-
- case OP_CRRANGE:
- case OP_CRMINRANGE:
- if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
- return 0;
- return 2;
-
- default:
- return 0;
- }
-}
-
-static int get_private_data_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
-{
-int private_data_length = 0;
-pcre_uchar *alternative;
-pcre_uchar *name;
-pcre_uchar *end = NULL;
-int space, size, i;
-pcre_uint32 bracketlen;
+int count;
+pcre_uchar *slot;
/* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
while (cc < ccend)
{
- space = 0;
- size = 0;
- bracketlen = 0;
switch(*cc)
{
case OP_SET_SOM:
@@ -790,130 +761,67 @@ while (cc < ccend)
cc += 1 + IMM2_SIZE;
break;
- case OP_ASSERT:
- case OP_ASSERT_NOT:
- case OP_ASSERTBACK:
- case OP_ASSERTBACK_NOT:
- case OP_ONCE:
- case OP_ONCE_NC:
- case OP_BRAPOS:
- case OP_SBRA:
- case OP_SBRAPOS:
- private_data_length += sizeof(sljit_sw);
- bracketlen = 1 + LINK_SIZE;
- break;
-
case OP_CBRAPOS:
case OP_SCBRAPOS:
- private_data_length += sizeof(sljit_sw);
common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
- bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
+ cc += 1 + LINK_SIZE + IMM2_SIZE;
break;
case OP_COND:
case OP_SCOND:
- bracketlen = cc[1 + LINK_SIZE];
- if (bracketlen == OP_CREF)
- {
- bracketlen = GET2(cc, 1 + LINK_SIZE + 1);
- common->optimized_cbracket[bracketlen] = 0;
- }
- else if (bracketlen == OP_NCREF)
- {
- bracketlen = GET2(cc, 1 + LINK_SIZE + 1);
- name = (pcre_uchar *)common->name_table;
- alternative = name;
- for (i = 0; i < common->name_count; i++)
- {
- if (GET2(name, 0) == bracketlen) break;
- name += common->name_entry_size;
- }
- SLJIT_ASSERT(i != common->name_count);
-
- for (i = 0; i < common->name_count; i++)
- {
- if (STRCMP_UC_UC(alternative + IMM2_SIZE, name + IMM2_SIZE) == 0)
- common->optimized_cbracket[GET2(alternative, 0)] = 0;
- alternative += common->name_entry_size;
- }
- }
-
- if (*cc == OP_COND)
- {
- /* Might be a hidden SCOND. */
- alternative = cc + GET(cc, 1);
- if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
- private_data_length += sizeof(sljit_sw);
- }
- else
- private_data_length += sizeof(sljit_sw);
- bracketlen = 1 + LINK_SIZE;
- break;
-
- case OP_BRA:
- bracketlen = 1 + LINK_SIZE;
- break;
-
- case OP_CBRA:
- case OP_SCBRA:
- bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
- break;
-
- CASE_ITERATOR_PRIVATE_DATA_1
- space = 1;
- size = -2;
- break;
-
- CASE_ITERATOR_PRIVATE_DATA_2A
- space = 2;
- size = -2;
- break;
-
- CASE_ITERATOR_PRIVATE_DATA_2B
- space = 2;
- size = -(2 + IMM2_SIZE);
- break;
-
- CASE_ITERATOR_TYPE_PRIVATE_DATA_1
- space = 1;
- size = 1;
- break;
-
- CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
- if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
- space = 2;
- size = 1;
- break;
-
- CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
- if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
- space = 2;
- size = 1 + IMM2_SIZE;
+ /* Only AUTO_CALLOUT can insert this opcode. We do
+ not intend to support this case. */
+ if (cc[1 + LINK_SIZE] == OP_CALLOUT)
+ return FALSE;
+ cc += 1 + LINK_SIZE;
break;
- case OP_CLASS:
- case OP_NCLASS:
- size += 1 + 32 / sizeof(pcre_uchar);
- space = get_class_iterator_size(cc + size);
+ case OP_CREF:
+ common->optimized_cbracket[GET2(cc, 1)] = 0;
+ cc += 1 + IMM2_SIZE;
break;
-#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
- case OP_XCLASS:
- size = GET(cc, 1);
- space = get_class_iterator_size(cc + size);
+ case OP_DNREF:
+ case OP_DNREFI:
+ case OP_DNCREF:
+ count = GET2(cc, 1 + IMM2_SIZE);
+ slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
+ while (count-- > 0)
+ {
+ common->optimized_cbracket[GET2(slot, 0)] = 0;
+ slot += common->name_entry_size;
+ }
+ cc += 1 + 2 * IMM2_SIZE;
break;
-#endif
case OP_RECURSE:
/* Set its value only once. */
- if (common->recursive_head == 0)
+ if (common->recursive_head_ptr == 0)
{
- common->recursive_head = common->ovector_start;
+ common->recursive_head_ptr = common->ovector_start;
common->ovector_start += sizeof(sljit_sw);
}
cc += 1 + LINK_SIZE;
break;
+ case OP_CALLOUT:
+ if (common->capture_last_ptr == 0)
+ {
+ common->capture_last_ptr = common->ovector_start;
+ common->ovector_start += sizeof(sljit_sw);
+ }
+ cc += 2 + 2 * LINK_SIZE;
+ break;
+
+ case OP_THEN_ARG:
+ common->has_then = TRUE;
+ common->control_head_ptr = 1;
+ /* Fall through. */
+
+ case OP_PRUNE_ARG:
+ common->needs_start_ptr = TRUE;
+ /* Fall through. */
+
case OP_MARK:
if (common->mark_ptr == 0)
{
@@ -923,48 +831,201 @@ while (cc < ccend)
cc += 1 + 2 + cc[1];
break;
+ case OP_THEN:
+ common->has_then = TRUE;
+ common->control_head_ptr = 1;
+ /* Fall through. */
+
+ case OP_PRUNE:
+ case OP_SKIP:
+ common->needs_start_ptr = TRUE;
+ cc += 1;
+ break;
+
+ case OP_SKIP_ARG:
+ common->control_head_ptr = 1;
+ common->has_skip_arg = TRUE;
+ cc += 1 + 2 + cc[1];
+ break;
+
default:
cc = next_opcode(common, cc);
if (cc == NULL)
- return -1;
+ return FALSE;
break;
}
+ }
+return TRUE;
+}
- if (space > 0 && cc >= end)
- private_data_length += sizeof(sljit_sw) * space;
+static int get_class_iterator_size(pcre_uchar *cc)
+{
+switch(*cc)
+ {
+ case OP_CRSTAR:
+ case OP_CRPLUS:
+ return 2;
- if (size != 0)
+ case OP_CRMINSTAR:
+ case OP_CRMINPLUS:
+ case OP_CRQUERY:
+ case OP_CRMINQUERY:
+ return 1;
+
+ case OP_CRRANGE:
+ case OP_CRMINRANGE:
+ if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
+ return 0;
+ return 2;
+
+ default:
+ return 0;
+ }
+}
+
+static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
+{
+pcre_uchar *end = bracketend(begin);
+pcre_uchar *next;
+pcre_uchar *next_end;
+pcre_uchar *max_end;
+pcre_uchar type;
+sljit_sw length = end - begin;
+int min, max, i;
+
+/* Detect fixed iterations first. */
+if (end[-(1 + LINK_SIZE)] != OP_KET)
+ return FALSE;
+
+/* Already detected repeat. */
+if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
+ return TRUE;
+
+next = end;
+min = 1;
+while (1)
+ {
+ if (*next != *begin)
+ break;
+ next_end = bracketend(next);
+ if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
+ break;
+ next = next_end;
+ min++;
+ }
+
+if (min == 2)
+ return FALSE;
+
+max = 0;
+max_end = next;
+if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
+ {
+ type = *next;
+ while (1)
{
- if (size < 0)
- {
- cc += -size;
-#ifdef SUPPORT_UTF
- if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
-#endif
- }
- else
- cc += size;
+ if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
+ break;
+ next_end = bracketend(next + 2 + LINK_SIZE);
+ if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
+ break;
+ next = next_end;
+ max++;
}
- if (bracketlen != 0)
+ if (next[0] == type && next[1] == *begin && max >= 1)
{
- if (cc >= end)
+ next_end = bracketend(next + 1);
+ if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
{
- end = bracketend(cc);
- if (end[-1 - LINK_SIZE] == OP_KET)
- end = NULL;
+ for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
+ if (*next_end != OP_KET)
+ break;
+
+ if (i == max)
+ {
+ common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
+ common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
+ /* +2 the original and the last. */
+ common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
+ if (min == 1)
+ return TRUE;
+ min--;
+ max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
+ }
}
- cc += bracketlen;
}
}
-return private_data_length;
+
+if (min >= 3)
+ {
+ common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
+ common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
+ common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
+ return TRUE;
+ }
+
+return FALSE;
}
-static void set_private_data_ptrs(compiler_common *common, int private_data_ptr, pcre_uchar *ccend)
+#define CASE_ITERATOR_PRIVATE_DATA_1 \
+ case OP_MINSTAR: \
+ case OP_MINPLUS: \
+ case OP_QUERY: \
+ case OP_MINQUERY: \
+ case OP_MINSTARI: \
+ case OP_MINPLUSI: \
+ case OP_QUERYI: \
+ case OP_MINQUERYI: \
+ case OP_NOTMINSTAR: \
+ case OP_NOTMINPLUS: \
+ case OP_NOTQUERY: \
+ case OP_NOTMINQUERY: \
+ case OP_NOTMINSTARI: \
+ case OP_NOTMINPLUSI: \
+ case OP_NOTQUERYI: \
+ case OP_NOTMINQUERYI:
+
+#define CASE_ITERATOR_PRIVATE_DATA_2A \
+ case OP_STAR: \
+ case OP_PLUS: \
+ case OP_STARI: \
+ case OP_PLUSI: \
+ case OP_NOTSTAR: \
+ case OP_NOTPLUS: \
+ case OP_NOTSTARI: \
+ case OP_NOTPLUSI:
+
+#define CASE_ITERATOR_PRIVATE_DATA_2B \
+ case OP_UPTO: \
+ case OP_MINUPTO: \
+ case OP_UPTOI: \
+ case OP_MINUPTOI: \
+ case OP_NOTUPTO: \
+ case OP_NOTMINUPTO: \
+ case OP_NOTUPTOI: \
+ case OP_NOTMINUPTOI:
+
+#define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
+ case OP_TYPEMINSTAR: \
+ case OP_TYPEMINPLUS: \
+ case OP_TYPEQUERY: \
+ case OP_TYPEMINQUERY:
+
+#define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
+ case OP_TYPESTAR: \
+ case OP_TYPEPLUS:
+
+#define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
+ case OP_TYPEUPTO: \
+ case OP_TYPEMINUPTO:
+
+static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
{
pcre_uchar *cc = common->start;
pcre_uchar *alternative;
pcre_uchar *end = NULL;
+int private_data_ptr = *private_data_start;
int space, size, bracketlen;
while (cc < ccend)
@@ -972,8 +1033,30 @@ while (cc < ccend)
space = 0;
size = 0;
bracketlen = 0;
+ if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
+ return;
+
+ if (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND)
+ if (detect_repeat(common, cc))
+ {
+ /* These brackets are converted to repeats, so no global
+ based single character repeat is allowed. */
+ if (cc >= end)
+ end = bracketend(cc);
+ }
+
switch(*cc)
{
+ case OP_KET:
+ if (common->private_data_ptrs[cc + 1 - common->start] != 0)
+ {
+ common->private_data_ptrs[cc - common->start] = private_data_ptr;
+ private_data_ptr += sizeof(sljit_sw);
+ cc += common->private_data_ptrs[cc + 1 - common->start];
+ }
+ cc += 1 + LINK_SIZE;
+ break;
+
case OP_ASSERT:
case OP_ASSERT_NOT:
case OP_ASSERTBACK:
@@ -1067,6 +1150,8 @@ while (cc < ccend)
break;
}
+ /* Character iterators, which are not inside a repeated bracket,
+ gets a private slot instead of allocating it on the stack. */
if (space > 0 && cc >= end)
{
common->private_data_ptrs[cc - common->start] = private_data_ptr;
@@ -1097,30 +1182,46 @@ while (cc < ccend)
cc += bracketlen;
}
}
+*private_data_start = private_data_ptr;
}
-/* Returns with -1 if no need for frame. */
-static int get_framesize(compiler_common *common, pcre_uchar *cc, BOOL recursive)
+/* Returns with a frame_types (always < 0) if no need for frame. */
+static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL* needs_control_head)
{
-pcre_uchar *ccend = bracketend(cc);
int length = 0;
-BOOL possessive = FALSE;
+int possessive = 0;
+BOOL stack_restore = FALSE;
BOOL setsom_found = recursive;
BOOL setmark_found = recursive;
+/* The last capture is a local variable even for recursions. */
+BOOL capture_last_found = FALSE;
+
+#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
+SLJIT_ASSERT(common->control_head_ptr != 0);
+*needs_control_head = TRUE;
+#else
+*needs_control_head = FALSE;
+#endif
-if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
+if (ccend == NULL)
{
- length = 3;
- possessive = TRUE;
+ ccend = bracketend(cc) - (1 + LINK_SIZE);
+ if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
+ {
+ possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
+ /* This is correct regardless of common->capture_last_ptr. */
+ capture_last_found = TRUE;
+ }
+ cc = next_opcode(common, cc);
}
-cc = next_opcode(common, cc);
SLJIT_ASSERT(cc != NULL);
while (cc < ccend)
switch(*cc)
{
case OP_SET_SOM:
SLJIT_ASSERT(common->has_set_som);
+ stack_restore = TRUE;
if (!setsom_found)
{
length += 2;
@@ -1130,16 +1231,22 @@ while (cc < ccend)
break;
case OP_MARK:
+ case OP_PRUNE_ARG:
+ case OP_THEN_ARG:
SLJIT_ASSERT(common->mark_ptr != 0);
+ stack_restore = TRUE;
if (!setmark_found)
{
length += 2;
setmark_found = TRUE;
}
+ if (common->control_head_ptr != 0)
+ *needs_control_head = TRUE;
cc += 1 + 2 + cc[1];
break;
case OP_RECURSE:
+ stack_restore = TRUE;
if (common->has_set_som && !setsom_found)
{
length += 2;
@@ -1150,6 +1257,11 @@ while (cc < ccend)
length += 2;
setmark_found = TRUE;
}
+ if (common->capture_last_ptr != 0 && !capture_last_found)
+ {
+ length += 2;
+ capture_last_found = TRUE;
+ }
cc += 1 + LINK_SIZE;
break;
@@ -1157,31 +1269,105 @@ while (cc < ccend)
case OP_CBRAPOS:
case OP_SCBRA:
case OP_SCBRAPOS:
+ stack_restore = TRUE;
+ if (common->capture_last_ptr != 0 && !capture_last_found)
+ {
+ length += 2;
+ capture_last_found = TRUE;
+ }
length += 3;
cc += 1 + LINK_SIZE + IMM2_SIZE;
break;
default:
+ stack_restore = TRUE;
+ /* Fall through. */
+
+ case OP_NOT_WORD_BOUNDARY:
+ case OP_WORD_BOUNDARY:
+ case OP_NOT_DIGIT:
+ case OP_DIGIT:
+ case OP_NOT_WHITESPACE:
+ case OP_WHITESPACE:
+ case OP_NOT_WORDCHAR:
+ case OP_WORDCHAR:
+ case OP_ANY:
+ case OP_ALLANY:
+ case OP_ANYBYTE:
+ case OP_NOTPROP:
+ case OP_PROP:
+ case OP_ANYNL:
+ case OP_NOT_HSPACE:
+ case OP_HSPACE:
+ case OP_NOT_VSPACE:
+ case OP_VSPACE:
+ case OP_EXTUNI:
+ case OP_EODN:
+ case OP_EOD:
+ case OP_CIRC:
+ case OP_CIRCM:
+ case OP_DOLL:
+ case OP_DOLLM:
+ case OP_CHAR:
+ case OP_CHARI:
+ case OP_NOT:
+ case OP_NOTI:
+
+ case OP_EXACT:
+ case OP_POSSTAR:
+ case OP_POSPLUS:
+ case OP_POSQUERY:
+ case OP_POSUPTO:
+
+ case OP_EXACTI:
+ case OP_POSSTARI:
+ case OP_POSPLUSI:
+ case OP_POSQUERYI:
+ case OP_POSUPTOI:
+
+ case OP_NOTEXACT:
+ case OP_NOTPOSSTAR:
+ case OP_NOTPOSPLUS:
+ case OP_NOTPOSQUERY:
+ case OP_NOTPOSUPTO:
+
+ case OP_NOTEXACTI:
+ case OP_NOTPOSSTARI:
+ case OP_NOTPOSPLUSI:
+ case OP_NOTPOSQUERYI:
+ case OP_NOTPOSUPTOI:
+
+ case OP_TYPEEXACT:
+ case OP_TYPEPOSSTAR:
+ case OP_TYPEPOSPLUS:
+ case OP_TYPEPOSQUERY:
+ case OP_TYPEPOSUPTO:
+
+ case OP_CLASS:
+ case OP_NCLASS:
+ case OP_XCLASS:
+
cc = next_opcode(common, cc);
SLJIT_ASSERT(cc != NULL);
break;
}
/* Possessive quantifiers can use a special case. */
-if (SLJIT_UNLIKELY(possessive) && length == 3)
- return -1;
+if (SLJIT_UNLIKELY(possessive == length))
+ return stack_restore ? no_frame : no_stack;
if (length > 0)
return length + 1;
-return -1;
+return stack_restore ? no_frame : no_stack;
}
-static void init_frame(compiler_common *common, pcre_uchar *cc, int stackpos, int stacktop, BOOL recursive)
+static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
{
DEFINE_COMPILER;
-pcre_uchar *ccend = bracketend(cc);
BOOL setsom_found = recursive;
BOOL setmark_found = recursive;
+/* The last capture is a local variable even for recursions. */
+BOOL capture_last_found = FALSE;
int offset;
/* >= 1 + shortest item size (2) */
@@ -1189,8 +1375,13 @@ SLJIT_UNUSED_ARG(stacktop);
SLJIT_ASSERT(stackpos >= stacktop + 2);
stackpos = STACK(stackpos);
-if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
- cc = next_opcode(common, cc);
+if (ccend == NULL)
+ {
+ ccend = bracketend(cc) - (1 + LINK_SIZE);
+ if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
+ cc = next_opcode(common, cc);
+ }
+
SLJIT_ASSERT(cc != NULL);
while (cc < ccend)
switch(*cc)
@@ -1200,7 +1391,7 @@ while (cc < ccend)
if (!setsom_found)
{
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
stackpos += (int)sizeof(sljit_sw);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
stackpos += (int)sizeof(sljit_sw);
@@ -1210,11 +1401,13 @@ while (cc < ccend)
break;
case OP_MARK:
+ case OP_PRUNE_ARG:
+ case OP_THEN_ARG:
SLJIT_ASSERT(common->mark_ptr != 0);
if (!setmark_found)
{
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setmark);
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
stackpos += (int)sizeof(sljit_sw);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
stackpos += (int)sizeof(sljit_sw);
@@ -1227,7 +1420,7 @@ while (cc < ccend)
if (common->has_set_som && !setsom_found)
{
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
stackpos += (int)sizeof(sljit_sw);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
stackpos += (int)sizeof(sljit_sw);
@@ -1236,12 +1429,21 @@ while (cc < ccend)
if (common->mark_ptr != 0 && !setmark_found)
{
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setmark);
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
stackpos += (int)sizeof(sljit_sw);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
stackpos += (int)sizeof(sljit_sw);
setmark_found = TRUE;
}
+ if (common->capture_last_ptr != 0 && !capture_last_found)
+ {
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
+ stackpos += (int)sizeof(sljit_sw);
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
+ stackpos += (int)sizeof(sljit_sw);
+ capture_last_found = TRUE;
+ }
cc += 1 + LINK_SIZE;
break;
@@ -1249,6 +1451,15 @@ while (cc < ccend)
case OP_CBRAPOS:
case OP_SCBRA:
case OP_SCBRAPOS:
+ if (common->capture_last_ptr != 0 && !capture_last_found)
+ {
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
+ stackpos += (int)sizeof(sljit_sw);
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
+ stackpos += (int)sizeof(sljit_sw);
+ capture_last_found = TRUE;
+ }
offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
stackpos += (int)sizeof(sljit_sw);
@@ -1268,13 +1479,13 @@ while (cc < ccend)
break;
}
-OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_end);
+OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
SLJIT_ASSERT(stackpos == STACK(stacktop));
}
-static SLJIT_INLINE int get_private_data_length_for_copy(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
+static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
{
-int private_data_length = 2;
+int private_data_length = needs_control_head ? 3 : 2;
int size;
pcre_uchar *alternative;
/* Calculate the sum of the private machine words. */
@@ -1283,6 +1494,12 @@ while (cc < ccend)
size = 0;
switch(*cc)
{
+ case OP_KET:
+ if (PRIVATE_DATA(cc) != 0)
+ private_data_length++;
+ cc += 1 + LINK_SIZE;
+ break;
+
case OP_ASSERT:
case OP_ASSERT_NOT:
case OP_ASSERTBACK:
@@ -1387,7 +1604,7 @@ return private_data_length;
}
static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
- BOOL save, int stackptr, int stacktop)
+ BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
{
DEFINE_COMPILER;
int srcw[2];
@@ -1408,7 +1625,7 @@ stacktop = STACK(stacktop - 1);
if (!save)
{
- stackptr += sizeof(sljit_sw);
+ stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
if (stackptr < stacktop)
{
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
@@ -1424,15 +1641,21 @@ if (!save)
/* The tmp1next must be TRUE in either way. */
}
-while (status != end)
+do
{
count = 0;
switch(status)
{
case start:
- SLJIT_ASSERT(save && common->recursive_head != 0);
+ SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
count = 1;
- srcw[0] = common->recursive_head;
+ srcw[0] = common->recursive_head_ptr;
+ if (needs_control_head)
+ {
+ SLJIT_ASSERT(common->control_head_ptr != 0);
+ count = 2;
+ srcw[1] = common->control_head_ptr;
+ }
status = loop;
break;
@@ -1445,6 +1668,15 @@ while (status != end)
switch(*cc)
{
+ case OP_KET:
+ if (PRIVATE_DATA(cc) != 0)
+ {
+ count = 1;
+ srcw[0] = PRIVATE_DATA(cc);
+ }
+ cc += 1 + LINK_SIZE;
+ break;
+
case OP_ASSERT:
case OP_ASSERT_NOT:
case OP_ASSERTBACK:
@@ -1657,6 +1889,7 @@ while (status != end)
}
}
}
+while (status != end);
if (save)
{
@@ -1690,6 +1923,39 @@ if (save)
SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
}
+static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset)
+{
+pcre_uchar *end = bracketend(cc);
+BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
+
+/* Assert captures then. */
+if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
+ current_offset = NULL;
+/* Conditional block does not. */
+if (*cc == OP_COND || *cc == OP_SCOND)
+ has_alternatives = FALSE;
+
+cc = next_opcode(common, cc);
+if (has_alternatives)
+ current_offset = common->then_offsets + (cc - common->start);
+
+while (cc < end)
+ {
+ if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
+ cc = set_then_offsets(common, cc, current_offset);
+ else
+ {
+ if (*cc == OP_ALT && has_alternatives)
+ current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
+ if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
+ *current_offset = 1;
+ cc = next_opcode(common, cc);
+ }
+ }
+
+return end;
+}
+
#undef CASE_ITERATOR_PRIVATE_DATA_1
#undef CASE_ITERATOR_PRIVATE_DATA_2A
#undef CASE_ITERATOR_PRIVATE_DATA_2B
@@ -1708,7 +1974,7 @@ while (list)
{
/* sljit_set_label is clever enough to do nothing
if either the jump or the label is NULL. */
- sljit_set_label(list->jump, label);
+ SET_LABEL(list->jump, label);
list = list->next;
}
}
@@ -1724,15 +1990,13 @@ if (list_item)
}
}
-static void add_stub(compiler_common *common, enum stub_types type, int data, struct sljit_jump *start)
+static void add_stub(compiler_common *common, struct sljit_jump *start)
{
DEFINE_COMPILER;
stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
if (list_item)
{
- list_item->type = type;
- list_item->data = data;
list_item->start = start;
list_item->quit = LABEL();
list_item->next = common->stubs;
@@ -1748,23 +2012,18 @@ stub_list* list_item = common->stubs;
while (list_item)
{
JUMPHERE(list_item->start);
- switch(list_item->type)
- {
- case stack_alloc:
- add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
- break;
- }
+ add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
JUMPTO(SLJIT_JUMP, list_item->quit);
list_item = list_item->next;
}
common->stubs = NULL;
}
-static SLJIT_INLINE void decrease_call_count(compiler_common *common)
+static SLJIT_INLINE void count_match(compiler_common *common)
{
DEFINE_COMPILER;
-OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1);
+OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
}
@@ -1781,7 +2040,7 @@ OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
#endif
-add_stub(common, stack_alloc, 0, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
+add_stub(common, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
}
static SLJIT_INLINE void free_stack(compiler_common *common, int size)
@@ -1795,18 +2054,20 @@ static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
DEFINE_COMPILER;
struct sljit_label *loop;
int i;
+
/* At this point we can freely use all temporary registers. */
+SLJIT_ASSERT(length > 1);
/* TMP1 returns with begin - 1. */
OP2(SLJIT_SUB, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
if (length < 8)
{
- for (i = 0; i < length; i++)
+ for (i = 1; i < length; i++)
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_SCRATCH_REG1, 0);
}
else
{
- GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, OVECTOR_START - sizeof(sljit_sw));
- OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, length);
+ GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, OVECTOR_START);
+ OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, length - 1);
loop = LABEL();
OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(sljit_sw), SLJIT_SCRATCH_REG1, 0);
OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 1);
@@ -1814,11 +2075,69 @@ else
}
}
+static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
+{
+DEFINE_COMPILER;
+struct sljit_label *loop;
+int i;
+
+SLJIT_ASSERT(length > 1);
+/* OVECTOR(1) contains the "string begin - 1" constant. */
+if (length > 2)
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
+if (length < 8)
+ {
+ for (i = 2; i < length; i++)
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), TMP1, 0);
+ }
+else
+ {
+ GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
+ OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
+ loop = LABEL();
+ OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
+ OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
+ JUMPTO(SLJIT_C_NOT_ZERO, loop);
+ }
+
+OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
+if (common->mark_ptr != 0)
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, SLJIT_IMM, 0);
+if (common->control_head_ptr != 0)
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
+OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
+OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_ptr);
+OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
+}
+
+static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
+{
+while (current != NULL)
+ {
+ switch (current[-2])
+ {
+ case type_then_trap:
+ break;
+
+ case type_mark:
+ if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
+ return current[-4];
+ break;
+
+ default:
+ SLJIT_ASSERT_STOP();
+ break;
+ }
+ current = (sljit_sw*)current[-1];
+ }
+return -1;
+}
+
static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
{
DEFINE_COMPILER;
struct sljit_label *loop;
-struct sljit_jump *earlyexit;
+struct sljit_jump *early_quit;
/* At this point we can freely use all registers. */
OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
@@ -1827,14 +2146,14 @@ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, ARGUMENTS, 0);
if (common->mark_ptr != 0)
OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
-OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offsetcount));
+OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offset_count));
if (common->mark_ptr != 0)
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_SCRATCH_REG3, 0);
OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
/* Unlikely, but possible */
-earlyexit = CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 0);
+early_quit = CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 0);
loop = LABEL();
OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_SCRATCH_REG1, 0);
OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_sw));
@@ -1845,7 +2164,7 @@ OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT
OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
JUMPTO(SLJIT_C_NOT_ZERO, loop);
-JUMPHERE(earlyexit);
+JUMPHERE(early_quit);
/* Calculate the return value, which is the maximum ovector value. */
if (topbracket > 1)
@@ -1867,18 +2186,29 @@ else
static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
{
DEFINE_COMPILER;
+struct sljit_jump *jump;
SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
-SLJIT_ASSERT(common->start_used_ptr != 0 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
+SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
+ && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
-OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offsetcount));
-CMPTO(SLJIT_C_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 2, quit);
+OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
+CMPTO(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 2, quit);
/* Store match begin and end. */
OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
+
+jump = CMP(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 3);
+OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_SAVED_REG1, 0);
+#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
+OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
+#endif
+OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 2 * sizeof(int), SLJIT_SCRATCH_REG3, 0);
+JUMPHERE(jump);
+
OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
@@ -2040,7 +2370,7 @@ return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
static void check_partial(compiler_common *common, BOOL force)
{
-/* Checks whether a partial matching is occured. Does not modify registers. */
+/* Checks whether a partial matching is occurred. Does not modify registers. */
DEFINE_COMPILER;
struct sljit_jump *jump = NULL;
@@ -2055,7 +2385,7 @@ else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
else
{
if (common->partialmatchlabel != NULL)
@@ -2068,35 +2398,34 @@ if (jump != NULL)
JUMPHERE(jump);
}
-static struct sljit_jump *check_str_end(compiler_common *common)
+static void check_str_end(compiler_common *common, jump_list **end_reached)
{
/* Does not affect registers. Usually used in a tight spot. */
DEFINE_COMPILER;
struct sljit_jump *jump;
-struct sljit_jump *nohit;
-struct sljit_jump *return_value;
if (common->mode == JIT_COMPILE)
- return CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
+ {
+ add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
+ return;
+ }
jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
{
- nohit = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
- JUMPHERE(nohit);
- return_value = JUMP(SLJIT_JUMP);
+ add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
+ add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
}
else
{
- return_value = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
+ add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
if (common->partialmatchlabel != NULL)
JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
else
add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
}
JUMPHERE(jump);
-return return_value;
}
static void detect_partial_match(compiler_common *common, jump_list **backtracks)
@@ -2115,7 +2444,7 @@ jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
{
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
}
else
@@ -2573,7 +2902,7 @@ DEFINE_COMPILER;
struct sljit_label *start;
struct sljit_jump *quit;
pcre_uint32 chars[MAX_N_CHARS * 2];
-pcre_uchar *cc = common->start + 1 + IMM2_SIZE;
+pcre_uchar *cc = common->start + 1 + LINK_SIZE;
int location = 0;
pcre_int32 len, c, bit, caseless;
int must_stop;
@@ -2696,10 +3025,10 @@ if (firstline)
{
SLJIT_ASSERT(common->first_line_end != 0);
OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
- OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, (location >> 1) - 1);
+ OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
}
else
- OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, (location >> 1) - 1);
+ OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
start = LABEL();
quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
@@ -2728,7 +3057,7 @@ JUMPHERE(quit);
if (firstline)
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
else
- OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, (location >> 1) - 1);
+ OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
return TRUE;
}
@@ -2877,16 +3206,24 @@ if (firstline)
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
}
+static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks);
+
static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
{
DEFINE_COMPILER;
struct sljit_label *start;
struct sljit_jump *quit;
-struct sljit_jump *found;
+struct sljit_jump *found = NULL;
+jump_list *matches = NULL;
+pcre_uint8 inverted_start_bits[32];
+int i;
#ifndef COMPILE_PCRE8
struct sljit_jump *jump;
#endif
+for (i = 0; i < 32; ++i)
+ inverted_start_bits[i] = ~(((pcre_uint8*)start_bits)[i]);
+
if (firstline)
{
SLJIT_ASSERT(common->first_line_end != 0);
@@ -2901,17 +3238,21 @@ OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
if (common->utf)
OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
#endif
+
+if (!check_class_ranges(common, inverted_start_bits, (inverted_start_bits[31] & 0x80) != 0, &matches))
+ {
#ifndef COMPILE_PCRE8
-jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
-OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
-JUMPHERE(jump);
+ jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
+ JUMPHERE(jump);
#endif
-OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
-OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
-OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);
-OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
-OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
-found = JUMP(SLJIT_C_NOT_ZERO);
+ OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
+ OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
+ OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);
+ OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
+ OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
+ found = JUMP(SLJIT_C_NOT_ZERO);
+ }
#ifdef SUPPORT_UTF
if (common->utf)
@@ -2939,7 +3280,10 @@ if (common->utf)
#endif /* COMPILE_PCRE[8|16] */
#endif /* SUPPORT_UTF */
JUMPTO(SLJIT_JUMP, start);
-JUMPHERE(found);
+if (found != NULL)
+ JUMPHERE(found);
+if (matches != NULL)
+ set_jumps(matches, LABEL());
JUMPHERE(quit);
if (firstline)
@@ -3022,7 +3366,9 @@ GET_LOCAL_BASE(TMP3, 0, 0);
/* Drop frames until we reach STACK_TOP. */
mainloop = LABEL();
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
-jump = CMP(SLJIT_C_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
+OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
+jump = JUMP(SLJIT_C_SIG_LESS_EQUAL);
+
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
@@ -3030,31 +3376,14 @@ OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
JUMPTO(SLJIT_JUMP, mainloop);
JUMPHERE(jump);
-jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
+jump = JUMP(SLJIT_C_SIG_LESS);
/* End of dropping frames. */
sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
JUMPHERE(jump);
-jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setstrbegin);
-/* Set string begin. */
-OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
-OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
-OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0);
-JUMPTO(SLJIT_JUMP, mainloop);
-
-JUMPHERE(jump);
-if (common->mark_ptr != 0)
- {
- jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setmark);
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
- OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP2, 0);
- JUMPTO(SLJIT_JUMP, mainloop);
-
- JUMPHERE(jump);
- }
-
-/* Unknown command. */
+OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
+OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
+OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
JUMPTO(SLJIT_JUMP, mainloop);
}
@@ -3063,6 +3392,7 @@ static void check_wordboundary(compiler_common *common)
{
DEFINE_COMPILER;
struct sljit_jump *skipread;
+jump_list *skipread_list = NULL;
#if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
struct sljit_jump *jump;
#endif
@@ -3120,7 +3450,7 @@ else
JUMPHERE(skipread);
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
-skipread = check_str_end(common);
+check_str_end(common, &skipread_list);
peek_char(common);
/* Testing char type. This is a code duplication. */
@@ -3161,7 +3491,7 @@ else
JUMPHERE(jump);
#endif /* COMPILE_PCRE8 */
}
-JUMPHERE(skipread);
+set_jumps(skipread_list, LABEL());
OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
@@ -3216,7 +3546,9 @@ switch(ranges[0])
}
return TRUE;
}
- if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4]) && is_powerof2(ranges[4] - ranges[2]))
+ if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4])
+ && (ranges[2] | (ranges[4] - ranges[2])) == ranges[4]
+ && is_powerof2(ranges[4] - ranges[2]))
{
if (readch)
read_char(common);
@@ -3481,7 +3813,7 @@ sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
#if defined SUPPORT_UTF && defined SUPPORT_UCP
-static const pcre_uchar *SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
+static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
{
/* This function would be ineffective to do in JIT level. */
pcre_uint32 c1, c2;
@@ -3577,7 +3909,7 @@ do
#endif
context->length -= IN_UCHARS(1);
-#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
+#if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
/* Unaligned read is supported. */
if (othercasebit != 0 && othercasechar == cc)
@@ -3594,27 +3926,18 @@ do
#if defined COMPILE_PCRE8
if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
-#elif defined COMPILE_PCRE16
+#else
if (context->ucharptr >= 2 || context->length == 0)
-#elif defined COMPILE_PCRE32
- if (1 /* context->ucharptr >= 1 || context->length == 0 */)
#endif
{
-#if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
if (context->length >= 4)
OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
-#if defined COMPILE_PCRE8
else if (context->length >= 2)
OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
+#if defined COMPILE_PCRE8
else if (context->length >= 1)
OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
-#elif defined COMPILE_PCRE16
- else if (context->length >= 2)
- OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
-#endif /* COMPILE_PCRE[8|16] */
-#elif defined COMPILE_PCRE32
- OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
-#endif /* COMPILE_PCRE[8|16|32] */
+#endif /* COMPILE_PCRE8 */
context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
switch(context->ucharptr)
@@ -3625,7 +3948,6 @@ do
add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
break;
-#if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
case 2 / sizeof(pcre_uchar):
if (context->oc.asushort != 0)
OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
@@ -3640,8 +3962,6 @@ do
break;
#endif
-#endif /* COMPILE_PCRE[8|16] */
-
default:
SLJIT_ASSERT_STOP();
break;
@@ -3651,8 +3971,8 @@ do
#else
- /* Unaligned read is unsupported. */
- if (context->length > 0)
+ /* Unaligned read is unsupported or in 32 bit mode. */
+ if (context->length >= 1)
OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
@@ -3705,14 +4025,15 @@ DEFINE_COMPILER;
jump_list *found = NULL;
jump_list **list = (*cc & XCL_NOT) == 0 ? &found : backtracks;
pcre_int32 c, charoffset;
-const pcre_uint32 *other_cases;
struct sljit_jump *jump = NULL;
pcre_uchar *ccbegin;
int compares, invertcmp, numberofcmps;
+
#ifdef SUPPORT_UCP
BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
BOOL charsaved = FALSE;
int typereg = TMP1, scriptreg = TMP1;
+const pcre_uint32 *other_cases;
pcre_int32 typeoffset;
#endif
@@ -3808,11 +4129,15 @@ while (*cc != XCL_END)
case PT_SPACE:
case PT_PXSPACE:
case PT_WORD:
+ case PT_PXGRAPH:
+ case PT_PXPRINT:
+ case PT_PXPUNCT:
needstype = TRUE;
needschar = TRUE;
break;
case PT_CLIST:
+ case PT_UCNC:
needschar = TRUE;
break;
@@ -3994,16 +4319,15 @@ while (*cc != XCL_END)
case PT_SPACE:
case PT_PXSPACE:
- if (*cc == PT_SPACE)
- {
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
- jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset);
- }
SET_CHAR_OFFSET(9);
- OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);
+ OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
- if (*cc == PT_SPACE)
- JUMPHERE(jump);
+
+ OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
+ OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
+
+ OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
+ OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
SET_TYPE_OFFSET(ucp_Zl);
OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
@@ -4014,7 +4338,7 @@ while (*cc != XCL_END)
case PT_WORD:
OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
- /* ... fall through */
+ /* Fall through. */
case PT_ALNUM:
SET_TYPE_OFFSET(ucp_Ll);
@@ -4078,6 +4402,84 @@ while (*cc != XCL_END)
}
jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
break;
+
+ case PT_UCNC:
+ OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_DOLLAR_SIGN - charoffset);
+ OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
+ OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_COMMERCIAL_AT - charoffset);
+ OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
+ OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_GRAVE_ACCENT - charoffset);
+ OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
+
+ SET_CHAR_OFFSET(0xa0);
+ OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd7ff - charoffset);
+ OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
+ SET_CHAR_OFFSET(0);
+ OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
+ OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_GREATER_EQUAL);
+ jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
+ break;
+
+ case PT_PXGRAPH:
+ /* C and Z groups are the farthest two groups. */
+ SET_TYPE_OFFSET(ucp_Ll);
+ OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
+ OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER);
+
+ jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
+
+ /* In case of ucp_Cf, we overwrite the result. */
+ SET_CHAR_OFFSET(0x2066);
+ OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
+ OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
+
+ OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
+ OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
+
+ OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
+ OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
+
+ JUMPHERE(jump);
+ jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
+ break;
+
+ case PT_PXPRINT:
+ /* C and Z groups are the farthest two groups. */
+ SET_TYPE_OFFSET(ucp_Ll);
+ OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
+ OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER);
+
+ OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
+ OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
+
+ jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
+
+ /* In case of ucp_Cf, we overwrite the result. */
+ SET_CHAR_OFFSET(0x2066);
+ OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
+ OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
+
+ OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
+ OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
+
+ JUMPHERE(jump);
+ jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
+ break;
+
+ case PT_PXPUNCT:
+ SET_TYPE_OFFSET(ucp_Sc);
+ OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
+ OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
+
+ SET_CHAR_OFFSET(0);
+ OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xff);
+ OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
+
+ SET_TYPE_OFFSET(ucp_Pc);
+ OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
+ OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
+ jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
+ break;
}
cc += 2;
}
@@ -4103,6 +4505,7 @@ int length;
unsigned int c, oc, bit;
compare_context context;
struct sljit_jump *jump[4];
+jump_list *end_list;
#ifdef SUPPORT_UTF
struct sljit_label *label;
#ifdef SUPPORT_UCP
@@ -4171,15 +4574,15 @@ switch(type)
if (common->nltype == NLTYPE_FIXED && common->newline > 255)
{
jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
+ end_list = NULL;
if (common->mode != JIT_PARTIAL_HARD_COMPILE)
- jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
+ add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
else
- jump[1] = check_str_end(common);
+ check_str_end(common, &end_list);
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
- if (jump[1] != NULL)
- JUMPHERE(jump[1]);
+ set_jumps(end_list, LABEL());
JUMPHERE(jump[0]);
}
else
@@ -4238,19 +4641,20 @@ switch(type)
read_char(common);
jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
/* We don't need to handle soft partial matching case. */
+ end_list = NULL;
if (common->mode != JIT_PARTIAL_HARD_COMPILE)
- jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
+ add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
else
- jump[1] = check_str_end(common);
+ check_str_end(common, &end_list);
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
- jump[2] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
+ jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- jump[3] = JUMP(SLJIT_JUMP);
+ jump[2] = JUMP(SLJIT_JUMP);
JUMPHERE(jump[0]);
check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
+ set_jumps(end_list, LABEL());
JUMPHERE(jump[1]);
JUMPHERE(jump[2]);
- JUMPHERE(jump[3]);
return cc;
case OP_NOT_HSPACE:
@@ -4714,28 +5118,6 @@ if (context.length > 0)
return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
}
-static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
-{
-DEFINE_COMPILER;
-int offset = GET2(cc, 1) << 1;
-
-OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
-if (!common->jscript_compat)
- {
- if (backtracks == NULL)
- {
- /* OVECTOR(1) contains the "string begin - 1" constant. */
- OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
- OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
- OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
- return JUMP(SLJIT_C_NOT_ZERO);
- }
- add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
- }
-return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
-}
-
/* Forward definitions. */
static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
@@ -4768,24 +5150,65 @@ static void compile_backtrackingpath(compiler_common *, struct backtrack_common
#define BACKTRACK_AS(type) ((type *)backtrack)
-static pcre_uchar *compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
+static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
{
+/* The OVECTOR offset goes to TMP2. */
DEFINE_COMPILER;
-int offset = GET2(cc, 1) << 1;
+int count = GET2(cc, 1 + IMM2_SIZE);
+pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
+unsigned int offset;
+jump_list *found = NULL;
+
+SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
+
+OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
+
+count--;
+while (count-- > 0)
+ {
+ offset = GET2(slot, 0) << 1;
+ GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
+ add_jump(compiler, &found, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0));
+ slot += common->name_entry_size;
+ }
+
+offset = GET2(slot, 0) << 1;
+GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
+if (backtracks != NULL && !common->jscript_compat)
+ add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0));
+
+set_jumps(found, LABEL());
+}
+
+static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
+{
+DEFINE_COMPILER;
+BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
+int offset = 0;
struct sljit_jump *jump = NULL;
struct sljit_jump *partial;
struct sljit_jump *nopartial;
-OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
-/* OVECTOR(1) contains the "string begin - 1" constant. */
-if (withchecks && !common->jscript_compat)
- add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
+if (ref)
+ {
+ offset = GET2(cc, 1) << 1;
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
+ /* OVECTOR(1) contains the "string begin - 1" constant. */
+ if (withchecks && !common->jscript_compat)
+ add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
+ }
+else
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
#if defined SUPPORT_UTF && defined SUPPORT_UCP
if (common->utf && *cc == OP_REFI)
{
SLJIT_ASSERT(TMP1 == SLJIT_SCRATCH_REG1 && STACK_TOP == SLJIT_SCRATCH_REG2 && TMP2 == SLJIT_SCRATCH_REG3);
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
+ if (ref)
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
+ else
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
+
if (withchecks)
jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
@@ -4810,7 +5233,11 @@ if (common->utf && *cc == OP_REFI)
else
#endif /* SUPPORT_UTF && SUPPORT_UCP */
{
- OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
+ if (ref)
+ OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
+ else
+ OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
+
if (withchecks)
jump = JUMP(SLJIT_C_ZERO);
@@ -4847,14 +5274,15 @@ if (jump != NULL)
else
JUMPHERE(jump);
}
-return cc + 1 + IMM2_SIZE;
}
static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
{
DEFINE_COMPILER;
+BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
backtrack_common *backtrack;
pcre_uchar type;
+int offset = 0;
struct sljit_label *label;
struct sljit_jump *zerolength;
struct sljit_jump *jump = NULL;
@@ -4864,7 +5292,13 @@ BOOL minimize;
PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
+if (ref)
+ offset = GET2(cc, 1) << 1;
+else
+ cc += IMM2_SIZE;
type = cc[1 + IMM2_SIZE];
+
+SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
minimize = (type & 0x1) != 0;
switch(type)
{
@@ -4902,25 +5336,52 @@ if (!minimize)
if (min == 0)
{
allocate_stack(common, 2);
+ if (ref)
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
/* Temporary release of STR_PTR. */
OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
- zerolength = compile_ref_checks(common, ccbegin, NULL);
+ /* Handles both invalid and empty cases. Since the minimum repeat,
+ is zero the invalid case is basically the same as an empty case. */
+ if (ref)
+ zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
+ else
+ {
+ compile_dnref_search(common, ccbegin, NULL);
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, TMP2, 0);
+ zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
+ }
/* Restore if not zero length. */
OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
}
else
{
allocate_stack(common, 1);
+ if (ref)
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
- zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
+ if (ref)
+ {
+ add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
+ zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
+ }
+ else
+ {
+ compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, TMP2, 0);
+ zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
+ }
}
if (min > 1 || max > 1)
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
label = LABEL();
+ if (!ref)
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
if (min > 1 || max > 1)
@@ -4951,28 +5412,56 @@ if (!minimize)
JUMPHERE(zerolength);
BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
- decrease_call_count(common);
+ count_match(common);
return cc;
}
-allocate_stack(common, 2);
+allocate_stack(common, ref ? 2 : 3);
+if (ref)
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
if (type != OP_CRMINSTAR)
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
if (min == 0)
{
- zerolength = compile_ref_checks(common, ccbegin, NULL);
+ /* Handles both invalid and empty cases. Since the minimum repeat,
+ is zero the invalid case is basically the same as an empty case. */
+ if (ref)
+ zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
+ else
+ {
+ compile_dnref_search(common, ccbegin, NULL);
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
+ zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
+ }
+ /* Length is non-zero, we can match real repeats. */
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
jump = JUMP(SLJIT_JUMP);
}
else
- zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
+ {
+ if (ref)
+ {
+ add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
+ zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
+ }
+ else
+ {
+ compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
+ zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
+ }
+ }
BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
if (max > 0)
add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
+if (!ref)
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
@@ -4990,7 +5479,7 @@ if (jump != NULL)
JUMPHERE(jump);
JUMPHERE(zerolength);
-decrease_call_count(common);
+count_match(common);
return cc;
}
@@ -5000,9 +5489,21 @@ DEFINE_COMPILER;
backtrack_common *backtrack;
recurse_entry *entry = common->entries;
recurse_entry *prev = NULL;
-int start = GET(cc, 1);
+sljit_sw start = GET(cc, 1);
+pcre_uchar *start_cc;
+BOOL needs_control_head;
PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
+
+/* Inlining simple patterns. */
+if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
+ {
+ start_cc = common->start + start;
+ compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
+ BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
+ return cc + 1 + LINK_SIZE;
+ }
+
while (entry != NULL)
{
if (entry->start == start)
@@ -5051,10 +5552,111 @@ add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_
return cc + 1 + LINK_SIZE;
}
+static int SLJIT_CALL do_callout(struct jit_arguments* arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
+{
+const pcre_uchar *begin = arguments->begin;
+int *offset_vector = arguments->offsets;
+int offset_count = arguments->offset_count;
+int i;
+
+if (PUBL(callout) == NULL)
+ return 0;
+
+callout_block->version = 2;
+callout_block->callout_data = arguments->callout_data;
+
+/* Offsets in subject. */
+callout_block->subject_length = arguments->end - arguments->begin;
+callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
+callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
+#if defined COMPILE_PCRE8
+callout_block->subject = (PCRE_SPTR)begin;
+#elif defined COMPILE_PCRE16
+callout_block->subject = (PCRE_SPTR16)begin;
+#elif defined COMPILE_PCRE32
+callout_block->subject = (PCRE_SPTR32)begin;
+#endif
+
+/* Convert and copy the JIT offset vector to the offset_vector array. */
+callout_block->capture_top = 0;
+callout_block->offset_vector = offset_vector;
+for (i = 2; i < offset_count; i += 2)
+ {
+ offset_vector[i] = jit_ovector[i] - begin;
+ offset_vector[i + 1] = jit_ovector[i + 1] - begin;
+ if (jit_ovector[i] >= begin)
+ callout_block->capture_top = i;
+ }
+
+callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
+if (offset_count > 0)
+ offset_vector[0] = -1;
+if (offset_count > 1)
+ offset_vector[1] = -1;
+return (*PUBL(callout))(callout_block);
+}
+
+/* Aligning to 8 byte. */
+#define CALLOUT_ARG_SIZE \
+ (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
+
+#define CALLOUT_ARG_OFFSET(arg) \
+ (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg))
+
+static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
+{
+DEFINE_COMPILER;
+backtrack_common *backtrack;
+
+PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
+
+allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
+
+OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
+OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
+SLJIT_ASSERT(common->capture_last_ptr != 0);
+OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
+OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
+
+/* These pointer sized fields temporarly stores internal variables. */
+OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
+OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
+OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
+
+if (common->mark_ptr != 0)
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
+OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
+OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
+OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
+
+/* Needed to save important temporary registers. */
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
+OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
+GET_LOCAL_BASE(SLJIT_SCRATCH_REG3, 0, OVECTOR_START);
+sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
+OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
+OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
+free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
+
+/* Check return value. */
+OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
+add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_C_SIG_GREATER));
+if (common->forced_quit_label == NULL)
+ add_jump(compiler, &common->forced_quit, JUMP(SLJIT_C_SIG_LESS));
+else
+ JUMPTO(SLJIT_C_SIG_LESS, common->forced_quit_label);
+return cc + 2 + 2 * LINK_SIZE;
+}
+
+#undef CALLOUT_ARG_SIZE
+#undef CALLOUT_ARG_OFFSET
+
static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
{
DEFINE_COMPILER;
int framesize;
+int extrasize;
+BOOL needs_control_head;
int private_data_ptr;
backtrack_common altbacktrack;
pcre_uchar *ccbegin;
@@ -5064,13 +5666,20 @@ jump_list *tmp = NULL;
jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
jump_list **found;
/* Saving previous accept variables. */
-struct sljit_label *save_quitlabel = common->quitlabel;
-struct sljit_label *save_acceptlabel = common->acceptlabel;
+BOOL save_local_exit = common->local_exit;
+BOOL save_positive_assert = common->positive_assert;
+then_trap_backtrack *save_then_trap = common->then_trap;
+struct sljit_label *save_quit_label = common->quit_label;
+struct sljit_label *save_accept_label = common->accept_label;
jump_list *save_quit = common->quit;
+jump_list *save_positive_assert_quit = common->positive_assert_quit;
jump_list *save_accept = common->accept;
struct sljit_jump *jump;
struct sljit_jump *brajump = NULL;
+/* Assert captures then. */
+common->then_trap = NULL;
+
if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
{
SLJIT_ASSERT(!conditional);
@@ -5079,7 +5688,7 @@ if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
}
private_data_ptr = PRIVATE_DATA(cc);
SLJIT_ASSERT(private_data_ptr != 0);
-framesize = get_framesize(common, cc, FALSE);
+framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
backtrack->framesize = framesize;
backtrack->private_data_ptr = private_data_ptr;
opcode = *cc;
@@ -5098,27 +5707,56 @@ if (bra == OP_BRAMINZERO)
if (framesize < 0)
{
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
- allocate_stack(common, 1);
+ extrasize = needs_control_head ? 2 : 1;
+ if (framesize == no_frame)
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
+ allocate_stack(common, extrasize);
+ if (needs_control_head)
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
+ if (needs_control_head)
+ {
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
+ }
}
else
{
- allocate_stack(common, framesize + 2);
+ extrasize = needs_control_head ? 3 : 2;
+ allocate_stack(common, framesize + extrasize);
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
- OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(framesize + 1));
+ OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
+ if (needs_control_head)
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
- init_frame(common, ccbegin, framesize + 1, 2, FALSE);
+ if (needs_control_head)
+ {
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
+ }
+ else
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
+ init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
}
memset(&altbacktrack, 0, sizeof(backtrack_common));
-common->quitlabel = NULL;
-common->quit = NULL;
+if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
+ {
+ /* Negative assert is stronger than positive assert. */
+ common->local_exit = TRUE;
+ common->quit_label = NULL;
+ common->quit = NULL;
+ common->positive_assert = FALSE;
+ }
+else
+ common->positive_assert = TRUE;
+common->positive_assert_quit = NULL;
+
while (1)
{
- common->acceptlabel = NULL;
+ common->accept_label = NULL;
common->accept = NULL;
altbacktrack.top = NULL;
altbacktrack.topbacktracks = NULL;
@@ -5130,45 +5768,64 @@ while (1)
compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
{
- common->quitlabel = save_quitlabel;
- common->acceptlabel = save_acceptlabel;
- common->quit = save_quit;
+ if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
+ {
+ common->local_exit = save_local_exit;
+ common->quit_label = save_quit_label;
+ common->quit = save_quit;
+ }
+ common->positive_assert = save_positive_assert;
+ common->then_trap = save_then_trap;
+ common->accept_label = save_accept_label;
+ common->positive_assert_quit = save_positive_assert_quit;
common->accept = save_accept;
return NULL;
}
- common->acceptlabel = LABEL();
+ common->accept_label = LABEL();
if (common->accept != NULL)
- set_jumps(common->accept, common->acceptlabel);
+ set_jumps(common->accept, common->accept_label);
/* Reset stack. */
if (framesize < 0)
- OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
- else {
+ {
+ if (framesize == no_frame)
+ OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
+ else
+ free_stack(common, extrasize);
+ if (needs_control_head)
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
+ }
+ else
+ {
if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
{
/* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
+ if (needs_control_head)
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
}
else
{
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
+ if (needs_control_head)
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
}
- }
+ }
if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
{
/* We know that STR_PTR was stored on the top of the stack. */
if (conditional)
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
+ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0);
else if (bra == OP_BRAZERO)
{
if (framesize < 0)
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
+ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
else
{
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
+ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw));
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
}
OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
@@ -5185,9 +5842,16 @@ while (1)
compile_backtrackingpath(common, altbacktrack.top);
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
{
- common->quitlabel = save_quitlabel;
- common->acceptlabel = save_acceptlabel;
- common->quit = save_quit;
+ if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
+ {
+ common->local_exit = save_local_exit;
+ common->quit_label = save_quit_label;
+ common->quit = save_quit;
+ }
+ common->positive_assert = save_positive_assert;
+ common->then_trap = save_then_trap;
+ common->accept_label = save_accept_label;
+ common->positive_assert_quit = save_positive_assert_quit;
common->accept = save_accept;
return NULL;
}
@@ -5199,9 +5863,33 @@ while (1)
ccbegin = cc;
cc += GET(cc, 1);
}
+
+if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
+ {
+ SLJIT_ASSERT(common->positive_assert_quit == NULL);
+ /* Makes the check less complicated below. */
+ common->positive_assert_quit = common->quit;
+ }
+
/* None of them matched. */
-if (common->quit != NULL)
- set_jumps(common->quit, LABEL());
+if (common->positive_assert_quit != NULL)
+ {
+ jump = JUMP(SLJIT_JUMP);
+ set_jumps(common->positive_assert_quit, LABEL());
+ SLJIT_ASSERT(framesize != no_stack);
+ if (framesize < 0)
+ OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
+ else
+ {
+ OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
+ add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
+ OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
+ }
+ JUMPHERE(jump);
+ }
+
+if (needs_control_head)
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
{
@@ -5213,21 +5901,25 @@ if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
{
/* The topmost item should be 0. */
if (bra == OP_BRAZERO)
+ {
+ if (extrasize == 2)
+ free_stack(common, 1);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
+ }
else
- free_stack(common, 1);
+ free_stack(common, extrasize);
}
else
{
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
/* The topmost item should be 0. */
if (bra == OP_BRAZERO)
{
- free_stack(common, framesize + 1);
+ free_stack(common, framesize + extrasize - 1);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
}
else
- free_stack(common, framesize + 2);
+ free_stack(common, framesize + extrasize);
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
}
jump = JUMP(SLJIT_JUMP);
@@ -5239,10 +5931,14 @@ if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
if (framesize < 0)
{
/* We know that STR_PTR was stored on the top of the stack. */
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
+ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
/* Keep the STR_PTR on the top of the stack. */
if (bra == OP_BRAZERO)
+ {
OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
+ if (extrasize == 2)
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
+ }
else if (bra == OP_BRAMINZERO)
{
OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
@@ -5255,21 +5951,30 @@ if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
{
/* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
+ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 2) * sizeof(sljit_sw));
}
else
{
/* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
+ if (extrasize == 2)
+ {
+ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+ if (bra == OP_BRAMINZERO)
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
+ }
+ else
+ {
+ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
+ }
}
}
if (bra == OP_BRAZERO)
{
backtrack->matchingpath = LABEL();
- sljit_set_label(jump, backtrack->matchingpath);
+ SET_LABEL(jump, backtrack->matchingpath);
}
else if (bra == OP_BRAMINZERO)
{
@@ -5291,22 +5996,26 @@ else
{
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
if (bra != OP_BRA)
+ {
+ if (extrasize == 2)
+ free_stack(common, 1);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
+ }
else
- free_stack(common, 1);
+ free_stack(common, extrasize);
}
else
{
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
/* The topmost item should be 0. */
if (bra != OP_BRA)
{
- free_stack(common, framesize + 1);
+ free_stack(common, framesize + extrasize - 1);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
}
else
- free_stack(common, framesize + 2);
+ free_stack(common, framesize + extrasize);
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
}
@@ -5326,121 +6035,89 @@ else
}
}
-common->quitlabel = save_quitlabel;
-common->acceptlabel = save_acceptlabel;
-common->quit = save_quit;
+if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
+ {
+ common->local_exit = save_local_exit;
+ common->quit_label = save_quit_label;
+ common->quit = save_quit;
+ }
+common->positive_assert = save_positive_assert;
+common->then_trap = save_then_trap;
+common->accept_label = save_accept_label;
+common->positive_assert_quit = save_positive_assert_quit;
common->accept = save_accept;
return cc + 1 + LINK_SIZE;
}
-static sljit_sw SLJIT_CALL do_searchovector(sljit_uw refno, sljit_sw* locals, pcre_uchar *name_table)
+static SLJIT_INLINE void match_once_common(compiler_common *common, pcre_uchar ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
{
-int condition = FALSE;
-pcre_uchar *slotA = name_table;
-pcre_uchar *slotB;
-sljit_sw name_count = locals[LOCALS0 / sizeof(sljit_sw)];
-sljit_sw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)];
-sljit_sw no_capture;
-int i;
-
-locals += refno & 0xff;
-refno >>= 8;
-no_capture = locals[1];
+DEFINE_COMPILER;
+int stacksize;
-for (i = 0; i < name_count; i++)
+if (framesize < 0)
{
- if (GET2(slotA, 0) == refno) break;
- slotA += name_entry_size;
- }
+ if (framesize == no_frame)
+ OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
+ else
+ {
+ stacksize = needs_control_head ? 1 : 0;
+ if (ket != OP_KET || has_alternatives)
+ stacksize++;
+ free_stack(common, stacksize);
+ }
-if (i < name_count)
- {
- /* Found a name for the number - there can be only one; duplicate names
- for different numbers are allowed, but not vice versa. First scan down
- for duplicates. */
+ if (needs_control_head)
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? sizeof(sljit_sw) : 0);
- slotB = slotA;
- while (slotB > name_table)
+ /* TMP2 which is set here used by OP_KETRMAX below. */
+ if (ket == OP_KETRMAX)
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
+ else if (ket == OP_KETRMIN)
{
- slotB -= name_entry_size;
- if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
- {
- condition = locals[GET2(slotB, 0) << 1] != no_capture;
- if (condition) break;
- }
- else break;
+ /* Move the STR_PTR to the private_data_ptr. */
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
}
+ }
+else
+ {
+ stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
+ OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
+ if (needs_control_head)
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 0);
- /* Scan up for duplicates */
- if (!condition)
+ if (ket == OP_KETRMAX)
{
- slotB = slotA;
- for (i++; i < name_count; i++)
- {
- slotB += name_entry_size;
- if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
- {
- condition = locals[GET2(slotB, 0) << 1] != no_capture;
- if (condition) break;
- }
- else break;
- }
+ /* TMP2 which is set here used by OP_KETRMAX below. */
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
}
}
-return condition;
+if (needs_control_head)
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, TMP1, 0);
}
-static sljit_sw SLJIT_CALL do_searchgroups(sljit_uw recno, sljit_uw* locals, pcre_uchar *name_table)
+static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
{
-int condition = FALSE;
-pcre_uchar *slotA = name_table;
-pcre_uchar *slotB;
-sljit_uw name_count = locals[LOCALS0 / sizeof(sljit_sw)];
-sljit_uw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)];
-sljit_uw group_num = locals[POSSESSIVE0 / sizeof(sljit_sw)];
-sljit_uw i;
+DEFINE_COMPILER;
-for (i = 0; i < name_count; i++)
+if (common->capture_last_ptr != 0)
{
- if (GET2(slotA, 0) == recno) break;
- slotA += name_entry_size;
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
+ stacksize++;
}
-
-if (i < name_count)
+if (common->optimized_cbracket[offset >> 1] == 0)
{
- /* Found a name for the number - there can be only one; duplicate
- names for different numbers are allowed, but not vice versa. First
- scan down for duplicates. */
-
- slotB = slotA;
- while (slotB > name_table)
- {
- slotB -= name_entry_size;
- if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
- {
- condition = GET2(slotB, 0) == group_num;
- if (condition) break;
- }
- else break;
- }
-
- /* Scan up for duplicates */
- if (!condition)
- {
- slotB = slotA;
- for (i++; i < name_count; i++)
- {
- slotB += name_entry_size;
- if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
- {
- condition = GET2(slotB, 0) == group_num;
- if (condition) break;
- }
- else break;
- }
- }
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
+ stacksize += 2;
}
-return condition;
+return stacksize;
}
/*
@@ -5504,17 +6181,21 @@ backtrack_common *backtrack;
pcre_uchar opcode;
int private_data_ptr = 0;
int offset = 0;
-int stacksize;
+int i, stacksize;
+int repeat_ptr = 0, repeat_length = 0;
+int repeat_type = 0, repeat_count = 0;
pcre_uchar *ccbegin;
pcre_uchar *matchingpath;
+pcre_uchar *slot;
pcre_uchar bra = OP_BRA;
pcre_uchar ket;
assert_backtrack *assert;
BOOL has_alternatives;
+BOOL needs_control_head = FALSE;
struct sljit_jump *jump;
struct sljit_jump *skip;
-struct sljit_label *rmaxlabel = NULL;
-struct sljit_jump *braminzerojump = NULL;
+struct sljit_label *rmax_label = NULL;
+struct sljit_jump *braminzero = NULL;
PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
@@ -5527,35 +6208,36 @@ if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
opcode = *cc;
ccbegin = cc;
-matchingpath = ccbegin + 1 + LINK_SIZE;
+matchingpath = bracketend(cc) - 1 - LINK_SIZE;
+ket = *matchingpath;
+if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
+ {
+ repeat_ptr = PRIVATE_DATA(matchingpath);
+ repeat_length = PRIVATE_DATA(matchingpath + 1);
+ repeat_type = PRIVATE_DATA(matchingpath + 2);
+ repeat_count = PRIVATE_DATA(matchingpath + 3);
+ SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
+ if (repeat_type == OP_UPTO)
+ ket = OP_KETRMAX;
+ if (repeat_type == OP_MINUPTO)
+ ket = OP_KETRMIN;
+ }
if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
{
/* Drop this bracket_backtrack. */
parent->top = backtrack->prev;
- return bracketend(cc);
+ return matchingpath + 1 + LINK_SIZE + repeat_length;
}
-ket = *(bracketend(cc) - 1 - LINK_SIZE);
+matchingpath = ccbegin + 1 + LINK_SIZE;
SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
cc += GET(cc, 1);
has_alternatives = *cc == OP_ALT;
-if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
- {
- has_alternatives = (*matchingpath == OP_RREF) ? FALSE : TRUE;
- if (*matchingpath == OP_NRREF)
- {
- stacksize = GET2(matchingpath, 1);
- if (common->currententry == NULL || stacksize == RREF_ANY)
- has_alternatives = FALSE;
- else if (common->currententry->start == 0)
- has_alternatives = stacksize != 0;
- else
- has_alternatives = stacksize != (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
- }
- }
+if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
+ has_alternatives = (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF) ? FALSE : TRUE;
if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
opcode = OP_SCOND;
@@ -5586,12 +6268,12 @@ else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
SLJIT_ASSERT(private_data_ptr != 0);
BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
if (opcode == OP_ONCE)
- BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, FALSE);
+ BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
}
/* Instructions before the first alternative. */
stacksize = 0;
-if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
+if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
stacksize++;
if (bra == OP_BRAZERO)
stacksize++;
@@ -5600,7 +6282,7 @@ if (stacksize > 0)
allocate_stack(common, stacksize);
stacksize = 0;
-if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
+if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
{
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
stacksize++;
@@ -5616,7 +6298,7 @@ if (bra == OP_BRAMINZERO)
if (ket != OP_KETRMIN)
{
free_stack(common, 1);
- braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
+ braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
}
else
{
@@ -5631,13 +6313,13 @@ if (bra == OP_BRAMINZERO)
if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
{
/* When we come from outside, private_data_ptr contains the previous STR_PTR. */
- braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
+ braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
}
else
{
/* Except when the whole stack frame must be saved. */
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
- braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw));
+ braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw));
}
JUMPHERE(skip);
}
@@ -5650,77 +6332,106 @@ if (bra == OP_BRAMINZERO)
}
}
+if (repeat_type != 0)
+ {
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, repeat_count);
+ if (repeat_type == OP_EXACT)
+ rmax_label = LABEL();
+ }
+
if (ket == OP_KETRMIN)
BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
if (ket == OP_KETRMAX)
{
- rmaxlabel = LABEL();
- if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA)
- BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmaxlabel;
+ rmax_label = LABEL();
+ if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA && repeat_type == 0)
+ BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
}
/* Handling capturing brackets and alternatives. */
if (opcode == OP_ONCE)
{
+ stacksize = 0;
+ if (needs_control_head)
+ {
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
+ stacksize++;
+ }
+
if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
{
- /* Neither capturing brackets nor recursions are not found in the block. */
+ /* Neither capturing brackets nor recursions are found in the block. */
if (ket == OP_KETRMIN)
{
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
- allocate_stack(common, 2);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
- OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
+ stacksize += 2;
+ if (!needs_control_head)
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
}
- else if (ket == OP_KETRMAX || has_alternatives)
+ else
{
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
- allocate_stack(common, 1);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
+ if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
+ if (ket == OP_KETRMAX || has_alternatives)
+ stacksize++;
}
- else
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
+
+ if (stacksize > 0)
+ allocate_stack(common, stacksize);
+
+ stacksize = 0;
+ if (needs_control_head)
+ {
+ stacksize++;
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
+ }
+
+ if (ket == OP_KETRMIN)
+ {
+ if (needs_control_head)
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
+ if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
+ OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
+ }
+ else if (ket == OP_KETRMAX || has_alternatives)
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
}
else
{
- if (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives)
+ if (ket != OP_KET || has_alternatives)
+ stacksize++;
+
+ stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
+ allocate_stack(common, stacksize);
+
+ if (needs_control_head)
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
+
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
+ OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
+
+ stacksize = needs_control_head ? 1 : 0;
+ if (ket != OP_KET || has_alternatives)
{
- allocate_stack(common, BACKTRACK_AS(bracket_backtrack)->u.framesize + 2);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
- OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(BACKTRACK_AS(bracket_backtrack)->u.framesize + 1));
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
- init_frame(common, ccbegin, BACKTRACK_AS(bracket_backtrack)->u.framesize + 1, 2, FALSE);
+ stacksize++;
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
}
else
{
- allocate_stack(common, BACKTRACK_AS(bracket_backtrack)->u.framesize + 1);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
- OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(BACKTRACK_AS(bracket_backtrack)->u.framesize));
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
- init_frame(common, ccbegin, BACKTRACK_AS(bracket_backtrack)->u.framesize, 1, FALSE);
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
}
+ init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1, FALSE);
}
}
else if (opcode == OP_CBRA || opcode == OP_SCBRA)
{
/* Saving the previous values. */
- if (common->optimized_cbracket[offset >> 1] == 0)
- {
- allocate_stack(common, 3);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
- }
- else
+ if (common->optimized_cbracket[offset >> 1] != 0)
{
SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
allocate_stack(common, 2);
@@ -5730,6 +6441,13 @@ else if (opcode == OP_CBRA || opcode == OP_SCBRA)
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
}
+ else
+ {
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
+ allocate_stack(common, 1);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
+ }
}
else if (opcode == OP_SBRA || opcode == OP_SCOND)
{
@@ -5756,47 +6474,73 @@ if (opcode == OP_COND || opcode == OP_SCOND)
CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
matchingpath += 1 + IMM2_SIZE;
}
- else if (*matchingpath == OP_NCREF)
+ else if (*matchingpath == OP_DNCREF)
{
SLJIT_ASSERT(has_alternatives);
- stacksize = GET2(matchingpath, 1);
- jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(stacksize << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
-
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
- OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, (stacksize << 8) | (common->ovector_start / sizeof(sljit_sw)));
- GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0);
- OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table);
- sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchovector));
- OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
- add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0));
- JUMPHERE(jump);
- matchingpath += 1 + IMM2_SIZE;
+ i = GET2(matchingpath, 1 + IMM2_SIZE);
+ slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
+ OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
+ OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
+ slot += common->name_entry_size;
+ i--;
+ while (i-- > 0)
+ {
+ OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
+ OP2(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, STR_PTR, 0);
+ slot += common->name_entry_size;
+ }
+ OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
+ add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_C_ZERO));
+ matchingpath += 1 + 2 * IMM2_SIZE;
}
- else if (*matchingpath == OP_RREF || *matchingpath == OP_NRREF)
+ else if (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF)
{
/* Never has other case. */
BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
+ SLJIT_ASSERT(!has_alternatives);
- stacksize = GET2(matchingpath, 1);
- if (common->currententry == NULL)
- stacksize = 0;
- else if (stacksize == RREF_ANY)
- stacksize = 1;
- else if (common->currententry->start == 0)
- stacksize = stacksize == 0;
- else
- stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
-
- if (*matchingpath == OP_RREF || stacksize || common->currententry == NULL)
+ if (*matchingpath == OP_RREF)
{
- SLJIT_ASSERT(!has_alternatives);
+ stacksize = GET2(matchingpath, 1);
+ if (common->currententry == NULL)
+ stacksize = 0;
+ else if (stacksize == RREF_ANY)
+ stacksize = 1;
+ else if (common->currententry->start == 0)
+ stacksize = stacksize == 0;
+ else
+ stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
+
if (stacksize != 0)
matchingpath += 1 + IMM2_SIZE;
+ }
+ else
+ {
+ if (common->currententry == NULL || common->currententry->start == 0)
+ stacksize = 0;
else
{
+ stacksize = GET2(matchingpath, 1 + IMM2_SIZE);
+ slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
+ i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
+ while (stacksize > 0)
+ {
+ if ((int)GET2(slot, 0) == i)
+ break;
+ slot += common->name_entry_size;
+ stacksize--;
+ }
+ }
+
+ if (stacksize != 0)
+ matchingpath += 1 + 2 * IMM2_SIZE;
+ }
+
+ /* The stacksize == 0 is a common "else" case. */
+ if (stacksize == 0)
+ {
if (*cc == OP_ALT)
{
matchingpath = cc + 1 + LINK_SIZE;
@@ -5805,24 +6549,6 @@ if (opcode == OP_COND || opcode == OP_SCOND)
else
matchingpath = cc;
}
- }
- else
- {
- SLJIT_ASSERT(has_alternatives);
-
- stacksize = GET2(matchingpath, 1);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, GET2(common->start, common->currententry->start + 1 + LINK_SIZE));
- OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, stacksize);
- GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0);
- OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table);
- sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchgroups));
- OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
- add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0));
- matchingpath += 1 + IMM2_SIZE;
- }
}
else
{
@@ -5843,34 +6569,24 @@ if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
return NULL;
if (opcode == OP_ONCE)
- {
- if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
- {
- OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
- /* TMP2 which is set here used by OP_KETRMAX below. */
- if (ket == OP_KETRMAX)
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
- else if (ket == OP_KETRMIN)
- {
- /* Move the STR_PTR to the private_data_ptr. */
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
- }
- }
- else
- {
- stacksize = (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives) ? 2 : 1;
- OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize) * sizeof(sljit_sw));
- if (ket == OP_KETRMAX)
- {
- /* TMP2 which is set here used by OP_KETRMAX below. */
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
- }
- }
- }
+ match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
stacksize = 0;
+if (repeat_type == OP_MINUPTO)
+ {
+ /* We need to preserve the counter. TMP2 will be used below. */
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr);
+ stacksize++;
+ }
if (ket != OP_KET || bra != OP_BRA)
stacksize++;
+if (offset != 0)
+ {
+ if (common->capture_last_ptr != 0)
+ stacksize++;
+ if (common->optimized_cbracket[offset >> 1] == 0)
+ stacksize += 2;
+ }
if (has_alternatives && opcode != OP_ONCE)
stacksize++;
@@ -5878,17 +6594,25 @@ if (stacksize > 0)
allocate_stack(common, stacksize);
stacksize = 0;
-if (ket != OP_KET)
+if (repeat_type == OP_MINUPTO)
{
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
+ /* TMP2 was set above. */
+ OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
stacksize++;
}
-else if (bra != OP_BRA)
+
+if (ket != OP_KET || bra != OP_BRA)
{
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
+ if (ket != OP_KET)
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
+ else
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
stacksize++;
}
+if (offset != 0)
+ stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
+
if (has_alternatives)
{
if (opcode != OP_ONCE)
@@ -5898,36 +6622,58 @@ if (has_alternatives)
}
/* Must be after the matchingpath label. */
-if (offset != 0)
+if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
{
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
+ SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 0), TMP1, 0);
}
if (ket == OP_KETRMAX)
{
- if (opcode == OP_ONCE || opcode >= OP_SBRA)
+ if (repeat_type != 0)
+ {
+ if (has_alternatives)
+ BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
+ OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, 1);
+ JUMPTO(SLJIT_C_NOT_ZERO, rmax_label);
+ /* Drop STR_PTR for greedy plus quantifier. */
+ if (opcode != OP_ONCE)
+ free_stack(common, 1);
+ }
+ else if (opcode == OP_ONCE || opcode >= OP_SBRA)
{
if (has_alternatives)
BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
/* Checking zero-length iteration. */
if (opcode != OP_ONCE)
{
- CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0, rmaxlabel);
+ CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0, rmax_label);
/* Drop STR_PTR for greedy plus quantifier. */
if (bra != OP_BRAZERO)
free_stack(common, 1);
}
else
/* TMP2 must contain the starting STR_PTR. */
- CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmaxlabel);
+ CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label);
}
else
- JUMPTO(SLJIT_JUMP, rmaxlabel);
+ JUMPTO(SLJIT_JUMP, rmax_label);
BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
}
+if (repeat_type == OP_EXACT)
+ {
+ OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, 1);
+ JUMPTO(SLJIT_C_NOT_ZERO, rmax_label);
+ }
+else if (repeat_type == OP_UPTO)
+ {
+ /* We need to preserve the counter. */
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr);
+ allocate_stack(common, 1);
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
+ }
+
if (bra == OP_BRAZERO)
BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
@@ -5935,9 +6681,9 @@ if (bra == OP_BRAMINZERO)
{
/* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
- if (braminzerojump != NULL)
+ if (braminzero != NULL)
{
- JUMPHERE(braminzerojump);
+ JUMPHERE(braminzero);
/* We need to release the end pointer to perform the
backtrack for the zero-length iteration. When
framesize is < 0, OP_ONCE will do the release itself. */
@@ -5953,13 +6699,17 @@ if (bra == OP_BRAMINZERO)
}
if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
- decrease_call_count(common);
+ count_match(common);
/* Skip the other alternatives. */
while (*cc == OP_ALT)
cc += GET(cc, 1);
cc += 1 + LINK_SIZE;
-return cc;
+
+/* Temporarily encoding the needs_control_head in framesize. */
+if (opcode == OP_ONCE)
+ BACKTRACK_AS(bracket_backtrack)->u.framesize = (BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
+return cc + repeat_length;
}
static pcre_uchar *compile_bracketpos_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
@@ -5969,12 +6719,13 @@ backtrack_common *backtrack;
pcre_uchar opcode;
int private_data_ptr;
int cbraprivptr = 0;
+BOOL needs_control_head;
int framesize;
int stacksize;
int offset = 0;
BOOL zero = FALSE;
pcre_uchar *ccbegin = NULL;
-int stack;
+int stack; /* Also contains the offset of control head. */
struct sljit_label *loop = NULL;
struct jump_list *emptymatch = NULL;
@@ -6012,59 +6763,104 @@ switch(opcode)
break;
}
-framesize = get_framesize(common, cc, FALSE);
+framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
if (framesize < 0)
{
- stacksize = (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS) ? 2 : 1;
+ if (offset != 0)
+ {
+ stacksize = 2;
+ if (common->capture_last_ptr != 0)
+ stacksize++;
+ }
+ else
+ stacksize = 1;
+
+ if (needs_control_head)
+ stacksize++;
if (!zero)
stacksize++;
+
BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
allocate_stack(common, stacksize);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
+ if (framesize == no_frame)
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
- if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
+ stack = 0;
+ if (offset != 0)
{
+ stack = 2;
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
+ if (common->capture_last_ptr != 0)
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
+ if (needs_control_head)
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
+ if (common->capture_last_ptr != 0)
+ {
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
+ stack = 3;
+ }
}
else
+ {
+ if (needs_control_head)
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
+ stack = 1;
+ }
+ if (needs_control_head)
+ stack++;
if (!zero)
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 1);
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
+ if (needs_control_head)
+ {
+ stack--;
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
+ }
}
else
{
stacksize = framesize + 1;
if (!zero)
stacksize++;
- if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
+ if (needs_control_head)
+ stacksize++;
+ if (offset == 0)
stacksize++;
BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
- allocate_stack(common, stacksize);
+ allocate_stack(common, stacksize);
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
- OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1));
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
+ if (needs_control_head)
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
+ OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1));
+
stack = 0;
if (!zero)
{
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
+ stack = 1;
+ }
+ if (needs_control_head)
+ {
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
stack++;
}
- if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
+ if (offset == 0)
{
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
stack++;
}
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
- init_frame(common, cc, stacksize - 1, stacksize - framesize, FALSE);
+ init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize, FALSE);
+ stack -= 1 + (offset == 0);
}
-if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
+if (offset != 0)
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
loop = LABEL();
@@ -6080,13 +6876,16 @@ while (*cc != OP_KETRPOS)
if (framesize < 0)
{
- OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
+ if (framesize == no_frame)
+ OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
- if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
+ if (offset != 0)
{
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
+ if (common->capture_last_ptr != 0)
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
}
else
@@ -6104,12 +6903,14 @@ while (*cc != OP_KETRPOS)
}
else
{
- if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
+ if (offset != 0)
{
OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
+ if (common->capture_last_ptr != 0)
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
}
else
@@ -6132,6 +6933,10 @@ while (*cc != OP_KETRPOS)
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
}
}
+
+ if (needs_control_head)
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
+
JUMPTO(SLJIT_JUMP, loop);
flush_stubs(common);
@@ -6142,14 +6947,14 @@ while (*cc != OP_KETRPOS)
if (framesize < 0)
{
- if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
+ if (offset != 0)
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
else
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
}
else
{
- if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
+ if (offset != 0)
{
/* Last alternative. */
if (*cc == OP_KETRPOS)
@@ -6168,6 +6973,8 @@ while (*cc != OP_KETRPOS)
ccbegin = cc + 1 + LINK_SIZE;
}
+/* We don't have to restore the control head in case of a failed match. */
+
backtrack->topbacktracks = NULL;
if (!zero)
{
@@ -6179,11 +6986,11 @@ if (!zero)
/* None of them matched. */
set_jumps(emptymatch, LABEL());
-decrease_call_count(common);
+count_match(common);
return cc + 1 + LINK_SIZE;
}
-static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *arg1, int *arg2, pcre_uchar **end)
+static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *max, int *min, pcre_uchar **end)
{
int class_len;
@@ -6219,7 +7026,7 @@ else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
}
else
{
- SLJIT_ASSERT(*opcode >= OP_CLASS || *opcode <= OP_XCLASS);
+ SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS);
*type = *opcode;
cc++;
class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);
@@ -6230,18 +7037,24 @@ else
if (end != NULL)
*end = cc + class_len;
}
+ else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY)
+ {
+ *opcode -= OP_CRPOSSTAR - OP_POSSTAR;
+ if (end != NULL)
+ *end = cc + class_len;
+ }
else
{
- SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE);
- *arg1 = GET2(cc, (class_len + IMM2_SIZE));
- *arg2 = GET2(cc, class_len);
+ SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE);
+ *max = GET2(cc, (class_len + IMM2_SIZE));
+ *min = GET2(cc, class_len);
- if (*arg2 == 0)
+ if (*min == 0)
{
- SLJIT_ASSERT(*arg1 != 0);
- *opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : OP_MINUPTO;
+ SLJIT_ASSERT(*max != 0);
+ *opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : (*opcode == OP_CRMINRANGE ? OP_MINUPTO : OP_POSUPTO);
}
- if (*arg1 == *arg2)
+ if (*max == *min)
*opcode = OP_EXACT;
if (end != NULL)
@@ -6252,7 +7065,7 @@ else
if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO)
{
- *arg1 = GET2(cc, 0);
+ *max = GET2(cc, 0);
cc += IMM2_SIZE;
}
@@ -6281,7 +7094,7 @@ DEFINE_COMPILER;
backtrack_common *backtrack;
pcre_uchar opcode;
pcre_uchar type;
-int arg1 = -1, arg2 = -1;
+int max = -1, min = -1;
pcre_uchar* end;
jump_list *nomatch = NULL;
struct sljit_jump *jump = NULL;
@@ -6294,9 +7107,9 @@ int tmp_base, tmp_offset;
PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
-cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, &end);
+cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &min, &end);
-switch (type)
+switch(type)
{
case OP_NOT_DIGIT:
case OP_DIGIT:
@@ -6365,10 +7178,10 @@ switch(opcode)
{
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
- if (opcode == OP_CRRANGE && arg2 > 0)
- CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2, label);
- if (opcode == OP_UPTO || (opcode == OP_CRRANGE && arg1 > 0))
- jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, arg1);
+ if (opcode == OP_CRRANGE && min > 0)
+ CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
+ if (opcode == OP_UPTO || (opcode == OP_CRRANGE && max > 0))
+ jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
}
@@ -6395,7 +7208,7 @@ switch(opcode)
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
if (opcode <= OP_PLUS)
JUMPTO(SLJIT_JUMP, label);
- else if (opcode == OP_CRRANGE && arg1 == 0)
+ else if (opcode == OP_CRRANGE && max == 0)
{
OP2(SLJIT_ADD, base, offset1, base, offset1, SLJIT_IMM, 1);
JUMPTO(SLJIT_JUMP, label);
@@ -6405,11 +7218,11 @@ switch(opcode)
OP1(SLJIT_MOV, TMP1, 0, base, offset1);
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
OP1(SLJIT_MOV, base, offset1, TMP1, 0);
- CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
+ CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, max + 1, label);
}
set_jumps(nomatch, LABEL());
if (opcode == OP_CRRANGE)
- add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_LESS, base, offset1, SLJIT_IMM, arg2 + 1));
+ add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_LESS, base, offset1, SLJIT_IMM, min + 1));
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
}
BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
@@ -6447,7 +7260,7 @@ switch(opcode)
break;
case OP_EXACT:
- OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, arg1);
+ OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
label = LABEL();
compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
@@ -6460,7 +7273,7 @@ switch(opcode)
if (opcode == OP_POSPLUS)
compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
if (opcode == OP_POSUPTO)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, arg1);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, max);
OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
label = LABEL();
compile_char1_matchingpath(common, type, cc, &nomatch);
@@ -6484,12 +7297,40 @@ switch(opcode)
OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
break;
+ case OP_CRPOSRANGE:
+ /* Combination of OP_EXACT and OP_POSSTAR or OP_POSUPTO */
+ OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, min);
+ label = LABEL();
+ compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
+ OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
+ JUMPTO(SLJIT_C_NOT_ZERO, label);
+
+ if (max != 0)
+ {
+ SLJIT_ASSERT(max - min > 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, max - min);
+ }
+ OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
+ label = LABEL();
+ compile_char1_matchingpath(common, type, cc, &nomatch);
+ OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
+ if (max == 0)
+ JUMPTO(SLJIT_JUMP, label);
+ else
+ {
+ OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, 1);
+ JUMPTO(SLJIT_C_NOT_ZERO, label);
+ }
+ set_jumps(nomatch, LABEL());
+ OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
+ break;
+
default:
SLJIT_ASSERT_STOP();
break;
}
-decrease_call_count(common);
+count_match(common);
return end;
}
@@ -6498,7 +7339,7 @@ static SLJIT_INLINE pcre_uchar *compile_fail_accept_matchingpath(compiler_common
DEFINE_COMPILER;
backtrack_common *backtrack;
-PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
+PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
if (*cc == OP_FAIL)
{
@@ -6509,30 +7350,30 @@ if (*cc == OP_FAIL)
if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL)
{
/* No need to check notempty conditions. */
- if (common->acceptlabel == NULL)
+ if (common->accept_label == NULL)
add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
else
- JUMPTO(SLJIT_JUMP, common->acceptlabel);
+ JUMPTO(SLJIT_JUMP, common->accept_label);
return cc + 1;
}
-if (common->acceptlabel == NULL)
+if (common->accept_label == NULL)
add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)));
else
- CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), common->acceptlabel);
+ CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), common->accept_label);
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
-if (common->acceptlabel == NULL)
+if (common->accept_label == NULL)
add_jump(compiler, &common->accept, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0));
else
- CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, common->acceptlabel);
+ CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, common->accept_label);
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
-if (common->acceptlabel == NULL)
+if (common->accept_label == NULL)
add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
else
- CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->acceptlabel);
+ CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label);
add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
return cc + 1;
}
@@ -6556,10 +7397,86 @@ if (!optimized_cbracket)
return cc + 1 + IMM2_SIZE;
}
+static SLJIT_INLINE pcre_uchar *compile_control_verb_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
+{
+DEFINE_COMPILER;
+backtrack_common *backtrack;
+pcre_uchar opcode = *cc;
+pcre_uchar *ccend = cc + 1;
+
+if (opcode == OP_PRUNE_ARG || opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
+ ccend += 2 + cc[1];
+
+PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
+
+if (opcode == OP_SKIP)
+ {
+ allocate_stack(common, 1);
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
+ return ccend;
+ }
+
+if (opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
+ {
+ OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP2, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
+ }
+
+return ccend;
+}
+
+static pcre_uchar then_trap_opcode[1] = { OP_THEN_TRAP };
+
+static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent)
+{
+DEFINE_COMPILER;
+backtrack_common *backtrack;
+BOOL needs_control_head;
+int size;
+
+PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
+common->then_trap = BACKTRACK_AS(then_trap_backtrack);
+BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
+BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start);
+BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head);
+
+size = BACKTRACK_AS(then_trap_backtrack)->framesize;
+size = 3 + (size < 0 ? 0 : size);
+
+OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
+allocate_stack(common, size);
+if (size > 3)
+ OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw));
+else
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, STACK_TOP, 0);
+OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start);
+OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap);
+OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0);
+
+size = BACKTRACK_AS(then_trap_backtrack)->framesize;
+if (size >= 0)
+ init_frame(common, cc, ccend, size - 1, 0, FALSE);
+}
+
static void compile_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent)
{
DEFINE_COMPILER;
backtrack_common *backtrack;
+BOOL has_then_trap = FALSE;
+then_trap_backtrack *save_then_trap = NULL;
+
+SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS));
+
+if (common->has_then && common->then_offsets[cc - common->start] != 0)
+ {
+ SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0);
+ has_then_trap = TRUE;
+ save_then_trap = common->then_trap;
+ /* Tail item on backtrack. */
+ compile_then_trap_matchingpath(common, cc, ccend, parent);
+ }
while (cc < ccend)
{
@@ -6685,7 +7602,7 @@ while (cc < ccend)
case OP_CLASS:
case OP_NCLASS:
- if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRMINRANGE)
+ if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRPOSRANGE)
cc = compile_iterator_matchingpath(common, cc, parent);
else
cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
@@ -6693,7 +7610,7 @@ while (cc < ccend)
#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
case OP_XCLASS:
- if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRMINRANGE)
+ if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE)
cc = compile_iterator_matchingpath(common, cc, parent);
else
cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
@@ -6702,16 +7619,35 @@ while (cc < ccend)
case OP_REF:
case OP_REFI:
- if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRMINRANGE)
+ if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE)
cc = compile_ref_iterator_matchingpath(common, cc, parent);
else
- cc = compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
+ {
+ compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
+ cc += 1 + IMM2_SIZE;
+ }
+ break;
+
+ case OP_DNREF:
+ case OP_DNREFI:
+ if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE)
+ cc = compile_ref_iterator_matchingpath(common, cc, parent);
+ else
+ {
+ compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
+ compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
+ cc += 1 + 2 * IMM2_SIZE;
+ }
break;
case OP_RECURSE:
cc = compile_recurse_matchingpath(common, cc, parent);
break;
+ case OP_CALLOUT:
+ cc = compile_callout_matchingpath(common, cc, parent);
+ break;
+
case OP_ASSERT:
case OP_ASSERT_NOT:
case OP_ASSERTBACK:
@@ -6736,7 +7672,7 @@ while (cc < ccend)
}
BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
if (cc[1] > OP_ASSERTBACK_NOT)
- decrease_call_count(common);
+ count_match(common);
break;
case OP_ONCE:
@@ -6772,18 +7708,32 @@ while (cc < ccend)
PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
SLJIT_ASSERT(common->mark_ptr != 0);
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
- allocate_stack(common, 1);
+ allocate_stack(common, common->has_skip_arg ? 5 : 1);
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0);
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP2, 0);
OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
+ if (common->has_skip_arg)
+ {
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, STACK_TOP, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark);
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2));
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
+ }
cc += 1 + 2 + cc[1];
break;
+ case OP_PRUNE:
+ case OP_PRUNE_ARG:
+ case OP_SKIP:
+ case OP_SKIP_ARG:
+ case OP_THEN:
+ case OP_THEN_ARG:
case OP_COMMIT:
- PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
- cc += 1;
+ cc = compile_control_verb_matchingpath(common, cc, parent);
break;
case OP_FAIL:
@@ -6807,6 +7757,15 @@ while (cc < ccend)
if (cc == NULL)
return;
}
+
+if (has_then_trap)
+ {
+ /* Head item on backtrack. */
+ PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
+ BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
+ BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap;
+ common->then_trap = save_then_trap;
+ }
SLJIT_ASSERT(cc == ccend);
}
@@ -6831,7 +7790,7 @@ DEFINE_COMPILER;
pcre_uchar *cc = current->cc;
pcre_uchar opcode;
pcre_uchar type;
-int arg1 = -1, arg2 = -1;
+int max = -1, min = -1;
struct sljit_label *label = NULL;
struct sljit_jump *jump = NULL;
jump_list *jumplist = NULL;
@@ -6840,7 +7799,7 @@ int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_LO
int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
-cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, NULL);
+cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &min, NULL);
switch(opcode)
{
@@ -6859,7 +7818,7 @@ switch(opcode)
else
{
if (opcode == OP_UPTO)
- arg2 = 0;
+ min = 0;
if (opcode <= OP_PLUS)
{
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
@@ -6869,7 +7828,7 @@ switch(opcode)
{
OP1(SLJIT_MOV, TMP1, 0, base, offset1);
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
- jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, arg2 + 1);
+ jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, min + 1);
OP2(SLJIT_SUB, base, offset1, TMP1, 0, SLJIT_IMM, 1);
}
skip_char_back(common);
@@ -6914,12 +7873,12 @@ switch(opcode)
OP1(SLJIT_MOV, base, offset1, TMP1, 0);
if (opcode == OP_CRMINRANGE)
- CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2 + 1, label);
+ CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min + 1, label);
- if (opcode == OP_CRMINRANGE && arg1 == 0)
+ if (opcode == OP_CRMINRANGE && max == 0)
JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
else
- CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 2, CURRENT_AS(iterator_backtrack)->matchingpath);
+ CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, max + 2, CURRENT_AS(iterator_backtrack)->matchingpath);
set_jumps(jumplist, LABEL());
if (private_data_ptr == 0)
@@ -6954,6 +7913,7 @@ switch(opcode)
case OP_EXACT:
case OP_POSPLUS:
+ case OP_CRPOSRANGE:
set_jumps(current->topbacktracks, LABEL());
break;
@@ -6968,15 +7928,18 @@ switch(opcode)
}
}
-static void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
+static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
{
DEFINE_COMPILER;
pcre_uchar *cc = current->cc;
+BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
pcre_uchar type;
-type = cc[1 + IMM2_SIZE];
+type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE];
+
if ((type & 0x1) == 0)
{
+ /* Maximize case. */
set_jumps(current->topbacktracks, LABEL());
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
free_stack(common, 1);
@@ -6987,14 +7950,18 @@ if ((type & 0x1) == 0)
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath);
set_jumps(current->topbacktracks, LABEL());
-free_stack(common, 2);
+free_stack(common, ref ? 2 : 3);
}
-static void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
+static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
{
DEFINE_COMPILER;
+if (CURRENT_AS(recurse_backtrack)->inlined_pattern)
+ compile_backtrackingpath(common, current->top);
set_jumps(current->topbacktracks, LABEL());
+if (CURRENT_AS(recurse_backtrack)->inlined_pattern)
+ return;
if (common->has_set_som && common->mark_ptr != 0)
{
@@ -7082,11 +8049,10 @@ if (bra == OP_BRAZERO)
static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current)
{
DEFINE_COMPILER;
-int opcode;
+int opcode, stacksize, count;
int offset = 0;
int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr;
-int stacksize;
-int count;
+int repeat_ptr = 0, repeat_type = 0, repeat_count = 0;
pcre_uchar *cc = current->cc;
pcre_uchar *ccbegin;
pcre_uchar *ccprev;
@@ -7096,10 +8062,12 @@ pcre_uchar bra = OP_BRA;
pcre_uchar ket;
assert_backtrack *assert;
BOOL has_alternatives;
+BOOL needs_control_head = FALSE;
struct sljit_jump *brazero = NULL;
struct sljit_jump *once = NULL;
struct sljit_jump *cond = NULL;
-struct sljit_label *rminlabel = NULL;
+struct sljit_label *rmin_label = NULL;
+struct sljit_label *exact_label = NULL;
if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
{
@@ -7108,8 +8076,20 @@ if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
}
opcode = *cc;
+ccbegin = bracketend(cc) - 1 - LINK_SIZE;
+ket = *ccbegin;
+if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0)
+ {
+ repeat_ptr = PRIVATE_DATA(ccbegin);
+ repeat_type = PRIVATE_DATA(ccbegin + 2);
+ repeat_count = PRIVATE_DATA(ccbegin + 3);
+ SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0);
+ if (repeat_type == OP_UPTO)
+ ket = OP_KETRMAX;
+ if (repeat_type == OP_MINUPTO)
+ ket = OP_KETRMIN;
+ }
ccbegin = cc;
-ket = *(bracketend(ccbegin) - 1 - LINK_SIZE);
cc += GET(cc, 1);
has_alternatives = *cc == OP_ALT;
if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
@@ -7121,6 +8101,24 @@ if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN
if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
opcode = OP_ONCE;
+/* Decoding the needs_control_head in framesize. */
+if (opcode == OP_ONCE)
+ {
+ needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0;
+ CURRENT_AS(bracket_backtrack)->u.framesize >>= 1;
+ }
+
+if (ket != OP_KET && repeat_type != 0)
+ {
+ /* TMP1 is used in OP_KETRMIN below. */
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+ free_stack(common, 1);
+ if (repeat_type == OP_UPTO)
+ OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, TMP1, 0, SLJIT_IMM, 1);
+ else
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, TMP1, 0);
+ }
+
if (ket == OP_KETRMAX)
{
if (bra == OP_BRAZERO)
@@ -7135,7 +8133,15 @@ else if (ket == OP_KETRMIN)
if (bra != OP_BRAMINZERO)
{
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
- if (opcode >= OP_SBRA || opcode == OP_ONCE)
+ if (repeat_type != 0)
+ {
+ /* TMP1 was set a few lines above. */
+ CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
+ /* Drop STR_PTR for non-greedy plus quantifier. */
+ if (opcode != OP_ONCE)
+ free_stack(common, 1);
+ }
+ else if (opcode >= OP_SBRA || opcode == OP_ONCE)
{
/* Checking zero-length iteration. */
if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0)
@@ -7145,13 +8151,16 @@ else if (ket == OP_KETRMIN)
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (CURRENT_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw), CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
}
+ /* Drop STR_PTR for non-greedy plus quantifier. */
if (opcode != OP_ONCE)
free_stack(common, 1);
}
else
JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
}
- rminlabel = LABEL();
+ rmin_label = LABEL();
+ if (repeat_type != 0)
+ OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, 1);
}
else if (bra == OP_BRAZERO)
{
@@ -7159,6 +8168,34 @@ else if (bra == OP_BRAZERO)
free_stack(common, 1);
brazero = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
}
+else if (repeat_type == OP_EXACT)
+ {
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, 1);
+ exact_label = LABEL();
+ }
+
+if (offset != 0)
+ {
+ if (common->capture_last_ptr != 0)
+ {
+ SLJIT_ASSERT(common->optimized_cbracket[offset >> 1] == 0);
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, TMP1, 0);
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
+ free_stack(common, 3);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP2, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
+ }
+ else if (common->optimized_cbracket[offset >> 1] == 0)
+ {
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
+ free_stack(common, 2);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP2, 0);
+ }
+ }
if (SLJIT_UNLIKELY(opcode == OP_ONCE))
{
@@ -7255,67 +8292,63 @@ if (has_alternatives)
current->top = NULL;
current->topbacktracks = NULL;
current->nextbacktracks = NULL;
+ /* Conditional blocks always have an additional alternative, even if it is empty. */
if (*cc == OP_ALT)
{
ccprev = cc + 1 + LINK_SIZE;
cc += GET(cc, 1);
if (opcode != OP_COND && opcode != OP_SCOND)
{
- if (private_data_ptr != 0 && opcode != OP_ONCE)
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
+ if (opcode != OP_ONCE)
+ {
+ if (private_data_ptr != 0)
+ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
+ else
+ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+ }
else
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0));
}
compile_matchingpath(common, ccprev, cc, current);
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
return;
}
- /* Instructions after the current alternative is succesfully matched. */
+ /* Instructions after the current alternative is successfully matched. */
/* There is a similar code in compile_bracket_matchingpath. */
if (opcode == OP_ONCE)
- {
- if (CURRENT_AS(bracket_backtrack)->u.framesize < 0)
- {
- OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
- /* TMP2 which is set here used by OP_KETRMAX below. */
- if (ket == OP_KETRMAX)
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
- else if (ket == OP_KETRMIN)
- {
- /* Move the STR_PTR to the private_data_ptr. */
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
- }
- }
- else
- {
- OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (CURRENT_AS(bracket_backtrack)->u.framesize + 2) * sizeof(sljit_sw));
- if (ket == OP_KETRMAX)
- {
- /* TMP2 which is set here used by OP_KETRMAX below. */
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
- }
- }
- }
+ match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
stacksize = 0;
- if (opcode != OP_ONCE)
+ if (repeat_type == OP_MINUPTO)
+ {
+ /* We need to preserve the counter. TMP2 will be used below. */
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr);
stacksize++;
+ }
if (ket != OP_KET || bra != OP_BRA)
stacksize++;
+ if (offset != 0)
+ {
+ if (common->capture_last_ptr != 0)
+ stacksize++;
+ if (common->optimized_cbracket[offset >> 1] == 0)
+ stacksize += 2;
+ }
+ if (opcode != OP_ONCE)
+ stacksize++;
- if (stacksize > 0) {
- if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
- allocate_stack(common, stacksize);
- else
- {
- /* We know we have place at least for one item on the top of the stack. */
- SLJIT_ASSERT(stacksize == 1);
- OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
- }
- }
+ if (stacksize > 0)
+ allocate_stack(common, stacksize);
stacksize = 0;
+ if (repeat_type == OP_MINUPTO)
+ {
+ /* TMP2 was set above. */
+ OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
+ stacksize++;
+ }
+
if (ket != OP_KET || bra != OP_BRA)
{
if (ket != OP_KET)
@@ -7325,14 +8358,17 @@ if (has_alternatives)
stacksize++;
}
+ if (offset != 0)
+ stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
+
if (opcode != OP_ONCE)
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, count++);
- if (offset != 0)
+ if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0)
{
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
+ /* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */
+ SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 0), TMP1, 0);
}
JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath);
@@ -7357,7 +8393,6 @@ if (has_alternatives)
SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
assert = CURRENT_AS(bracket_backtrack)->u.assert;
if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0)
-
{
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->private_data_ptr);
add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
@@ -7374,23 +8409,19 @@ if (has_alternatives)
if (offset != 0)
{
/* Using both tmp register is better for instruction scheduling. */
- if (common->optimized_cbracket[offset >> 1] == 0)
+ if (common->optimized_cbracket[offset >> 1] != 0)
{
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
+ free_stack(common, 2);
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
- free_stack(common, 3);
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP2, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
}
else
{
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
- free_stack(common, 2);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP2, 0);
+ free_stack(common, 1);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
}
}
else if (opcode == OP_SBRA || opcode == OP_SCOND)
@@ -7401,17 +8432,19 @@ else if (opcode == OP_SBRA || opcode == OP_SCOND)
else if (opcode == OP_ONCE)
{
cc = ccbegin + GET(ccbegin, 1);
+ stacksize = needs_control_head ? 1 : 0;
+
if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
{
/* Reset head and drop saved frame. */
- stacksize = (ket == OP_KETRMAX || ket == OP_KETRMIN || *cc == OP_ALT) ? 2 : 1;
- free_stack(common, CURRENT_AS(bracket_backtrack)->u.framesize + stacksize);
+ stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1);
}
else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
{
/* The STR_PTR must be released. */
- free_stack(common, 1);
+ stacksize++;
}
+ free_stack(common, stacksize);
JUMPHERE(once);
/* Restore previous private_data_ptr */
@@ -7426,11 +8459,18 @@ else if (opcode == OP_ONCE)
}
}
-if (ket == OP_KETRMAX)
+if (repeat_type == OP_EXACT)
+ {
+ OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, 1);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, TMP1, 0);
+ CMPTO(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label);
+ }
+else if (ket == OP_KETRMAX)
{
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
if (bra != OP_BRAZERO)
free_stack(common, 1);
+
CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
if (bra == OP_BRAZERO)
{
@@ -7449,7 +8489,7 @@ else if (ket == OP_KETRMIN)
affect badly the free_stack(2) above. */
if (opcode != OP_ONCE)
free_stack(common, 1);
- CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rminlabel);
+ CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label);
if (opcode == OP_ONCE)
free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
else if (bra == OP_BRAMINZERO)
@@ -7463,7 +8503,7 @@ else if (bra == OP_BRAZERO)
}
}
-static void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current)
+static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current)
{
DEFINE_COMPILER;
int offset;
@@ -7477,7 +8517,11 @@ if (CURRENT_AS(bracketpos_backtrack)->framesize < 0)
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
+ if (common->capture_last_ptr != 0)
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP2, 0);
+ if (common->capture_last_ptr != 0)
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, TMP1, 0);
}
set_jumps(current->topbacktracks, LABEL());
free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
@@ -7498,7 +8542,7 @@ if (current->topbacktracks)
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracketpos_backtrack)->framesize * sizeof(sljit_sw));
}
-static void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current)
+static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current)
{
assert_backtrack backtrack;
@@ -7522,9 +8566,103 @@ else
SLJIT_ASSERT(!current->nextbacktracks && !current->topbacktracks);
}
+static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current)
+{
+DEFINE_COMPILER;
+pcre_uchar opcode = *current->cc;
+struct sljit_label *loop;
+struct sljit_jump *jump;
+
+if (opcode == OP_THEN || opcode == OP_THEN_ARG)
+ {
+ if (common->then_trap != NULL)
+ {
+ SLJIT_ASSERT(common->control_head_ptr != 0);
+
+ OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap);
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start);
+ jump = JUMP(SLJIT_JUMP);
+
+ loop = LABEL();
+ OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), -(int)sizeof(sljit_sw));
+ JUMPHERE(jump);
+ CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), -(int)(2 * sizeof(sljit_sw)), TMP1, 0, loop);
+ CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), -(int)(3 * sizeof(sljit_sw)), TMP2, 0, loop);
+ add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP));
+ return;
+ }
+ else if (common->positive_assert)
+ {
+ add_jump(compiler, &common->positive_assert_quit, JUMP(SLJIT_JUMP));
+ return;
+ }
+ }
+
+if (common->local_exit)
+ {
+ if (common->quit_label == NULL)
+ add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
+ else
+ JUMPTO(SLJIT_JUMP, common->quit_label);
+ return;
+ }
+
+if (opcode == OP_SKIP_ARG)
+ {
+ SLJIT_ASSERT(common->control_head_ptr != 0);
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
+ OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2));
+ sljit_emit_ijump(compiler, SLJIT_CALL2, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_search_mark));
+ OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
+
+ OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
+ add_jump(compiler, &common->reset_match, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, -1));
+ return;
+ }
+
+if (opcode == OP_SKIP)
+ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+else
+ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0);
+add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));
+}
+
+static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current)
+{
+DEFINE_COMPILER;
+struct sljit_jump *jump;
+int size;
+
+if (CURRENT_AS(then_trap_backtrack)->then_trap)
+ {
+ common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap;
+ return;
+ }
+
+size = CURRENT_AS(then_trap_backtrack)->framesize;
+size = 3 + (size < 0 ? 0 : size);
+
+OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3));
+free_stack(common, size);
+jump = JUMP(SLJIT_JUMP);
+
+set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL());
+/* STACK_TOP is set by THEN. */
+if (CURRENT_AS(then_trap_backtrack)->framesize >= 0)
+ add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
+OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+free_stack(common, 3);
+
+JUMPHERE(jump);
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, TMP1, 0);
+}
+
static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current)
{
DEFINE_COMPILER;
+then_trap_backtrack *save_then_trap = common->then_trap;
while (current)
{
@@ -7613,6 +8751,8 @@ while (current)
case OP_REF:
case OP_REFI:
+ case OP_DNREF:
+ case OP_DNREFI:
compile_ref_iterator_backtrackingpath(common, current);
break;
@@ -7658,31 +8798,52 @@ while (current)
break;
case OP_MARK:
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
- free_stack(common, 1);
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0));
+ if (common->has_skip_arg)
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+ free_stack(common, common->has_skip_arg ? 5 : 1);
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP1, 0);
+ if (common->has_skip_arg)
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, TMP2, 0);
+ break;
+
+ case OP_THEN:
+ case OP_THEN_ARG:
+ case OP_PRUNE:
+ case OP_PRUNE_ARG:
+ case OP_SKIP:
+ case OP_SKIP_ARG:
+ compile_control_verb_backtrackingpath(common, current);
break;
case OP_COMMIT:
- OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
- if (common->quitlabel == NULL)
+ if (!common->local_exit)
+ OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
+ if (common->quit_label == NULL)
add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
else
- JUMPTO(SLJIT_JUMP, common->quitlabel);
+ JUMPTO(SLJIT_JUMP, common->quit_label);
break;
+ case OP_CALLOUT:
case OP_FAIL:
case OP_ACCEPT:
case OP_ASSERT_ACCEPT:
set_jumps(current->topbacktracks, LABEL());
break;
+ case OP_THEN_TRAP:
+ /* A virtual opcode for then traps. */
+ compile_then_trap_backtrackingpath(common, current);
+ break;
+
default:
SLJIT_ASSERT_STOP();
break;
}
current = current->prev;
}
+common->then_trap = save_then_trap;
}
static SLJIT_INLINE void compile_recurse(compiler_common *common)
@@ -7691,39 +8852,43 @@ DEFINE_COMPILER;
pcre_uchar *cc = common->start + common->currententry->start;
pcre_uchar *ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
pcre_uchar *ccend = bracketend(cc);
-int private_data_size = get_private_data_length_for_copy(common, ccbegin, ccend);
-int framesize = get_framesize(common, cc, TRUE);
+BOOL needs_control_head;
+int framesize = get_framesize(common, cc, NULL, TRUE, &needs_control_head);
+int private_data_size = get_private_data_copy_length(common, ccbegin, ccend, needs_control_head);
int alternativesize;
-BOOL needsframe;
+BOOL needs_frame;
backtrack_common altbacktrack;
-struct sljit_label *save_quitlabel = common->quitlabel;
-jump_list *save_quit = common->quit;
struct sljit_jump *jump;
+/* Recurse captures then. */
+common->then_trap = NULL;
+
SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
-needsframe = framesize >= 0;
-if (!needsframe)
+needs_frame = framesize >= 0;
+if (!needs_frame)
framesize = 0;
alternativesize = *(cc + GET(cc, 1)) == OP_ALT ? 1 : 0;
-SLJIT_ASSERT(common->currententry->entry == NULL && common->recursive_head != 0);
+SLJIT_ASSERT(common->currententry->entry == NULL && common->recursive_head_ptr != 0);
common->currententry->entry = LABEL();
set_jumps(common->currententry->calls, common->currententry->entry);
sljit_emit_fast_enter(compiler, TMP2, 0);
allocate_stack(common, private_data_size + framesize + alternativesize);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(private_data_size + framesize + alternativesize - 1), TMP2, 0);
-copy_private_data(common, ccbegin, ccend, TRUE, private_data_size + framesize + alternativesize, framesize + alternativesize);
-OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->recursive_head, STACK_TOP, 0);
-if (needsframe)
- init_frame(common, cc, framesize + alternativesize - 1, alternativesize, TRUE);
+copy_private_data(common, ccbegin, ccend, TRUE, private_data_size + framesize + alternativesize, framesize + alternativesize, needs_control_head);
+if (needs_control_head)
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->recursive_head_ptr, STACK_TOP, 0);
+if (needs_frame)
+ init_frame(common, cc, NULL, framesize + alternativesize - 1, alternativesize, TRUE);
if (alternativesize > 0)
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
memset(&altbacktrack, 0, sizeof(backtrack_common));
-common->quitlabel = NULL;
-common->acceptlabel = NULL;
+common->quit_label = NULL;
+common->accept_label = NULL;
common->quit = NULL;
common->accept = NULL;
altbacktrack.cc = ccbegin;
@@ -7738,21 +8903,13 @@ while (1)
compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack);
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
- {
- common->quitlabel = save_quitlabel;
- common->quit = save_quit;
return;
- }
add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
compile_backtrackingpath(common, altbacktrack.top);
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
- {
- common->quitlabel = save_quitlabel;
- common->quit = save_quit;
return;
- }
set_jumps(altbacktrack.topbacktracks, LABEL());
if (*cc != OP_ALT)
@@ -7761,16 +8918,29 @@ while (1)
altbacktrack.cc = cc + 1 + LINK_SIZE;
cc += GET(cc, 1);
}
-/* None of them matched. */
-if (common->quit != NULL)
- set_jumps(common->quit, LABEL());
+/* None of them matched. */
OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
jump = JUMP(SLJIT_JUMP);
+if (common->quit != NULL)
+ {
+ set_jumps(common->quit, LABEL());
+ OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->recursive_head_ptr);
+ if (needs_frame)
+ {
+ OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
+ add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
+ OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
+ }
+ OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
+ common->quit = NULL;
+ add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
+ }
+
set_jumps(common->accept, LABEL());
-OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->recursive_head);
-if (needsframe)
+OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->recursive_head_ptr);
+if (needs_frame)
{
OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
@@ -7779,15 +8949,25 @@ if (needsframe)
OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1);
JUMPHERE(jump);
-copy_private_data(common, ccbegin, ccend, FALSE, private_data_size + framesize + alternativesize, framesize + alternativesize);
+if (common->quit != NULL)
+ set_jumps(common->quit, LABEL());
+copy_private_data(common, ccbegin, ccend, FALSE, private_data_size + framesize + alternativesize, framesize + alternativesize, needs_control_head);
free_stack(common, private_data_size + framesize + alternativesize);
-OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), sizeof(sljit_sw));
-OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
-OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->recursive_head, TMP2, 0);
+if (needs_control_head)
+ {
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 2 * sizeof(sljit_sw));
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), sizeof(sljit_sw));
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->recursive_head_ptr, TMP1, 0);
+ OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, TMP2, 0);
+ }
+else
+ {
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), sizeof(sljit_sw));
+ OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->recursive_head_ptr, TMP2, 0);
+ }
sljit_emit_fast_return(compiler, SLJIT_MEM1(STACK_TOP), 0);
-
-common->quitlabel = save_quitlabel;
-common->quit = save_quit;
}
#undef COMPILE_BACKTRACKINGPATH
@@ -7807,12 +8987,16 @@ pcre_uchar *ccend;
executable_functions *functions;
void *executable_func;
sljit_uw executable_size;
-struct sljit_label *mainloop = NULL;
-struct sljit_label *empty_match_found;
-struct sljit_label *empty_match_backtrack;
+struct sljit_label *mainloop_label = NULL;
+struct sljit_label *continue_match_label;
+struct sljit_label *empty_match_found_label;
+struct sljit_label *empty_match_backtrack_label;
+struct sljit_label *reset_match_label;
struct sljit_jump *jump;
+struct sljit_jump *minlength_check_failed = NULL;
struct sljit_jump *reqbyte_notfound = NULL;
struct sljit_jump *empty_match;
+struct sljit_label *quit_label;
SLJIT_ASSERT((extra->flags & PCRE_EXTRA_STUDY_DATA) != 0);
study = extra->study_data;
@@ -7863,7 +9047,7 @@ else
common->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
common->ctypes = (sljit_sw)(tables + ctypes_offset);
common->digits[0] = -2;
-common->name_table = (sljit_sw)((pcre_uchar *)re + re->name_table_offset);
+common->name_table = ((pcre_uchar *)re) + re->name_table_offset;
common->name_count = re->name_count;
common->name_entry_size = re->name_entry_size;
common->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
@@ -7877,15 +9061,22 @@ common->use_ucp = (re->options & PCRE_UCP) != 0;
ccend = bracketend(rootbacktrack.cc);
/* Calculate the local space size on the stack. */
-common->ovector_start = CALL_LIMIT + sizeof(sljit_sw);
+common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw);
common->optimized_cbracket = (pcre_uint8 *)SLJIT_MALLOC(re->top_bracket + 1);
if (!common->optimized_cbracket)
return;
+#if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1
+memset(common->optimized_cbracket, 0, re->top_bracket + 1);
+#else
memset(common->optimized_cbracket, 1, re->top_bracket + 1);
+#endif
SLJIT_ASSERT(*rootbacktrack.cc == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
-private_data_size = get_private_data_length(common, rootbacktrack.cc, ccend);
-if (private_data_size < 0)
+#if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2
+common->capture_last_ptr = common->ovector_start;
+common->ovector_start += sizeof(sljit_sw);
+#endif
+if (!check_opcode_types(common, rootbacktrack.cc, ccend))
{
SLJIT_FREE(common->optimized_cbracket);
return;
@@ -7904,7 +9095,12 @@ if (mode != JIT_COMPILE)
if (mode == JIT_PARTIAL_SOFT_COMPILE)
{
common->hit_start = common->ovector_start;
- common->ovector_start += sizeof(sljit_sw);
+ common->ovector_start += 2 * sizeof(sljit_sw);
+ }
+ else
+ {
+ SLJIT_ASSERT(mode == JIT_PARTIAL_HARD_COMPILE);
+ common->needs_start_ptr = TRUE;
}
}
if ((re->options & PCRE_FIRSTLINE) != 0)
@@ -7912,33 +9108,74 @@ if ((re->options & PCRE_FIRSTLINE) != 0)
common->first_line_end = common->ovector_start;
common->ovector_start += sizeof(sljit_sw);
}
+#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
+common->control_head_ptr = 1;
+#endif
+if (common->control_head_ptr != 0)
+ {
+ common->control_head_ptr = common->ovector_start;
+ common->ovector_start += sizeof(sljit_sw);
+ }
+if (common->needs_start_ptr && common->has_set_som)
+ {
+ /* Saving the real start pointer is necessary. */
+ common->start_ptr = common->ovector_start;
+ common->ovector_start += sizeof(sljit_sw);
+ }
+else
+ common->needs_start_ptr = FALSE;
/* Aligning ovector to even number of sljit words. */
if ((common->ovector_start & sizeof(sljit_sw)) != 0)
common->ovector_start += sizeof(sljit_sw);
+if (common->start_ptr == 0)
+ common->start_ptr = OVECTOR(0);
+
+/* Capturing brackets cannot be optimized if callouts are allowed. */
+if (common->capture_last_ptr != 0)
+ memset(common->optimized_cbracket, 0, re->top_bracket + 1);
+
SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
-common->cbraptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);
-private_data_size += common->cbraptr + (re->top_bracket + 1) * sizeof(sljit_sw);
-if (private_data_size > SLJIT_MAX_LOCAL_SIZE)
+common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);
+
+common->private_data_ptrs = (int *)SLJIT_MALLOC((ccend - rootbacktrack.cc) * sizeof(sljit_si));
+if (!common->private_data_ptrs)
{
SLJIT_FREE(common->optimized_cbracket);
return;
}
-common->private_data_ptrs = (int *)SLJIT_MALLOC((ccend - rootbacktrack.cc) * sizeof(int));
-if (!common->private_data_ptrs)
+memset(common->private_data_ptrs, 0, (ccend - rootbacktrack.cc) * sizeof(int));
+
+private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
+set_private_data_ptrs(common, &private_data_size, ccend);
+if (private_data_size > SLJIT_MAX_LOCAL_SIZE)
{
+ SLJIT_FREE(common->private_data_ptrs);
SLJIT_FREE(common->optimized_cbracket);
return;
}
-memset(common->private_data_ptrs, 0, (ccend - rootbacktrack.cc) * sizeof(int));
-set_private_data_ptrs(common, common->cbraptr + (re->top_bracket + 1) * sizeof(sljit_sw), ccend);
+
+if (common->has_then)
+ {
+ common->then_offsets = (pcre_uint8 *)SLJIT_MALLOC(ccend - rootbacktrack.cc);
+ if (!common->then_offsets)
+ {
+ SLJIT_FREE(common->optimized_cbracket);
+ SLJIT_FREE(common->private_data_ptrs);
+ return;
+ }
+ memset(common->then_offsets, 0, ccend - rootbacktrack.cc);
+ set_then_offsets(common, rootbacktrack.cc, NULL);
+ }
compiler = sljit_create_compiler();
if (!compiler)
{
SLJIT_FREE(common->optimized_cbracket);
SLJIT_FREE(common->private_data_ptrs);
+ if (common->has_then)
+ SLJIT_FREE(common->then_offsets);
return;
}
common->compiler = compiler;
@@ -7956,18 +9193,23 @@ OP1(SLJIT_MOV, TMP1, 0, SLJIT_SAVED_REG1, 0);
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
-OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, calllimit));
+OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match));
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, base));
OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, limit));
-OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CALL_LIMIT, TMP1, 0);
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LIMIT_MATCH, TMP1, 0);
if (mode == JIT_PARTIAL_SOFT_COMPILE)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
+if (common->mark_ptr != 0)
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, SLJIT_IMM, 0);
+if (common->control_head_ptr != 0)
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
/* Main part of the matching */
if ((re->options & PCRE_ANCHORED) == 0)
{
- mainloop = mainloop_entry(common, (re->flags & PCRE_HASCRORLF) != 0, (re->options & PCRE_FIRSTLINE) != 0);
+ mainloop_label = mainloop_entry(common, (re->flags & PCRE_HASCRORLF) != 0, (re->options & PCRE_FIRSTLINE) != 0);
+ continue_match_label = LABEL();
/* Forward search if possible. */
if ((re->options & PCRE_NO_START_OPTIMIZE) == 0)
{
@@ -7981,20 +9223,39 @@ if ((re->options & PCRE_ANCHORED) == 0)
fast_forward_start_bits(common, (sljit_uw)study->start_bits, (re->options & PCRE_FIRSTLINE) != 0);
}
}
+else
+ continue_match_label = LABEL();
+
+if (mode == JIT_COMPILE && study->minlength > 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0)
+ {
+ OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
+ OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(study->minlength));
+ minlength_check_failed = CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0);
+ }
if (common->req_char_ptr != 0)
reqbyte_notfound = search_requested_char(common, (pcre_uchar)re->req_char, (re->flags & PCRE_RCH_CASELESS) != 0, (re->flags & PCRE_FIRSTSET) != 0);
/* Store the current STR_PTR in OVECTOR(0). */
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), STR_PTR, 0);
/* Copy the limit of allowed recursions. */
-OP1(SLJIT_MOV, CALL_COUNT, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CALL_LIMIT);
-if (common->mark_ptr != 0)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, SLJIT_IMM, 0);
+OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LIMIT_MATCH);
+if (common->capture_last_ptr != 0)
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, -1);
+
+if (common->needs_start_ptr)
+ {
+ SLJIT_ASSERT(common->start_ptr != OVECTOR(0));
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_ptr, STR_PTR, 0);
+ }
+else
+ SLJIT_ASSERT(common->start_ptr == OVECTOR(0));
+
/* Copy the beginning of the string. */
if (mode == JIT_PARTIAL_SOFT_COMPILE)
{
- jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
+ jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start + sizeof(sljit_sw), STR_PTR, 0);
JUMPHERE(jump);
}
else if (mode == JIT_PARTIAL_HARD_COMPILE)
@@ -8006,46 +9267,55 @@ if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
sljit_free_compiler(compiler);
SLJIT_FREE(common->optimized_cbracket);
SLJIT_FREE(common->private_data_ptrs);
+ if (common->has_then)
+ SLJIT_FREE(common->then_offsets);
return;
}
empty_match = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
-empty_match_found = LABEL();
+empty_match_found_label = LABEL();
-common->acceptlabel = LABEL();
+common->accept_label = LABEL();
if (common->accept != NULL)
- set_jumps(common->accept, common->acceptlabel);
+ set_jumps(common->accept, common->accept_label);
/* This means we have a match. Update the ovector. */
copy_ovector(common, re->top_bracket + 1);
-common->quitlabel = LABEL();
+common->quit_label = common->forced_quit_label = LABEL();
if (common->quit != NULL)
- set_jumps(common->quit, common->quitlabel);
+ set_jumps(common->quit, common->quit_label);
+if (common->forced_quit != NULL)
+ set_jumps(common->forced_quit, common->forced_quit_label);
+if (minlength_check_failed != NULL)
+ SET_LABEL(minlength_check_failed, common->forced_quit_label);
sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0);
if (mode != JIT_COMPILE)
{
common->partialmatchlabel = LABEL();
set_jumps(common->partialmatch, common->partialmatchlabel);
- return_with_partial_match(common, common->quitlabel);
+ return_with_partial_match(common, common->quit_label);
}
-empty_match_backtrack = LABEL();
+empty_match_backtrack_label = LABEL();
compile_backtrackingpath(common, rootbacktrack.top);
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
{
sljit_free_compiler(compiler);
SLJIT_FREE(common->optimized_cbracket);
SLJIT_FREE(common->private_data_ptrs);
+ if (common->has_then)
+ SLJIT_FREE(common->then_offsets);
return;
}
SLJIT_ASSERT(rootbacktrack.prev == NULL);
+reset_match_label = LABEL();
if (mode == JIT_PARTIAL_SOFT_COMPILE)
{
/* Update hit_start only in the first time. */
- jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
+ jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr);
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, TMP1, 0);
@@ -8053,35 +9323,20 @@ if (mode == JIT_PARTIAL_SOFT_COMPILE)
}
/* Check we have remaining characters. */
-OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
+if ((re->options & PCRE_ANCHORED) == 0 && (re->options & PCRE_FIRSTLINE) != 0)
+ {
+ SLJIT_ASSERT(common->first_line_end != 0);
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
+ }
+
+OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_ptr);
if ((re->options & PCRE_ANCHORED) == 0)
{
if ((re->options & PCRE_FIRSTLINE) == 0)
- {
- if (mode == JIT_COMPILE && study != NULL && study->minlength > 1 && (re->options & PCRE_NO_START_OPTIMIZE) == 0)
- {
- OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(study->minlength + 1));
- CMPTO(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_END, 0, mainloop);
- }
- else
- CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
- }
+ CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop_label);
else
- {
- SLJIT_ASSERT(common->first_line_end != 0);
- if (mode == JIT_COMPILE && study != NULL && study->minlength > 1 && (re->options & PCRE_NO_START_OPTIMIZE) == 0)
- {
- OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(study->minlength + 1));
- OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, STR_END, 0);
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER);
- OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
- OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_GREATER_EQUAL);
- JUMPTO(SLJIT_C_ZERO, mainloop);
- }
- else
- CMPTO(SLJIT_C_LESS, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, mainloop);
- }
+ CMPTO(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0, mainloop_label);
}
/* No more remaining characters. */
@@ -8089,24 +9344,26 @@ if (reqbyte_notfound != NULL)
JUMPHERE(reqbyte_notfound);
if (mode == JIT_PARTIAL_SOFT_COMPILE)
- CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0, common->partialmatchlabel);
+ CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel);
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
-JUMPTO(SLJIT_JUMP, common->quitlabel);
+JUMPTO(SLJIT_JUMP, common->quit_label);
flush_stubs(common);
JUMPHERE(empty_match);
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
-CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_backtrack);
+CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_backtrack_label);
OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
-CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_found);
+CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_found_label);
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
-CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found);
-JUMPTO(SLJIT_JUMP, empty_match_backtrack);
+CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);
+JUMPTO(SLJIT_JUMP, empty_match_backtrack_label);
common->currententry = common->entries;
+common->local_exit = TRUE;
+quit_label = common->quit_label;
while (common->currententry != NULL)
{
/* Might add new entries. */
@@ -8116,11 +9373,15 @@ while (common->currententry != NULL)
sljit_free_compiler(compiler);
SLJIT_FREE(common->optimized_cbracket);
SLJIT_FREE(common->private_data_ptrs);
+ if (common->has_then)
+ SLJIT_FREE(common->then_offsets);
return;
}
flush_stubs(common);
common->currententry = common->currententry->next;
}
+common->local_exit = FALSE;
+common->quit_label = quit_label;
/* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */
/* This is a (really) rare case. */
@@ -8146,12 +9407,12 @@ sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
JUMPHERE(jump);
/* We break the return address cache here, but this is a really rare case. */
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_JIT_STACKLIMIT);
-JUMPTO(SLJIT_JUMP, common->quitlabel);
+JUMPTO(SLJIT_JUMP, common->quit_label);
/* Call limit reached. */
set_jumps(common->calllimit, LABEL());
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_MATCHLIMIT);
-JUMPTO(SLJIT_JUMP, common->quitlabel);
+JUMPTO(SLJIT_JUMP, common->quit_label);
if (common->revertframes != NULL)
{
@@ -8188,6 +9449,14 @@ if (common->caselesscmp != NULL)
set_jumps(common->caselesscmp, LABEL());
do_caselesscmp(common);
}
+if (common->reset_match != NULL)
+ {
+ set_jumps(common->reset_match, LABEL());
+ do_reset_match(common, (re->top_bracket + 1) * 2);
+ CMPTO(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label);
+ OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
+ JUMPTO(SLJIT_JUMP, reset_match_label);
+ }
#ifdef SUPPORT_UTF
#ifndef COMPILE_PCRE32
if (common->utfreadchar != NULL)
@@ -8214,6 +9483,9 @@ if (common->getucd != NULL)
SLJIT_FREE(common->optimized_cbracket);
SLJIT_FREE(common->private_data_ptrs);
+if (common->has_then)
+ SLJIT_FREE(common->then_offsets);
+
executable_func = sljit_generate_code(compiler);
executable_size = sljit_get_generated_code_size(compiler);
sljit_free_compiler(compiler);
@@ -8244,6 +9516,7 @@ else
}
memset(functions, 0, sizeof(executable_functions));
functions->top_bracket = (re->top_bracket + 1) * 2;
+ functions->limit_match = (re->flags & PCRE_MLSET) != 0 ? re->limit_match : 0;
extra->executable_jit = functions;
extra->flags |= PCRE_EXTRA_EXECUTABLE_JIT;
}
@@ -8272,7 +9545,7 @@ return convert_executable_func.call_executable_func(arguments);
int
PRIV(jit_exec)(const PUBL(extra) *extra_data, const pcre_uchar *subject,
- int length, int start_offset, int options, int *offsets, int offsetcount)
+ int length, int start_offset, int options, int *offsets, int offset_count)
{
executable_functions *functions = (executable_functions *)extra_data->executable_jit;
union {
@@ -8280,7 +9553,7 @@ union {
jit_function call_executable_func;
} convert_executable_func;
jit_arguments arguments;
-int maxoffsetcount;
+int max_offset_count;
int retval;
int mode = JIT_COMPILE;
@@ -8298,25 +9571,29 @@ arguments.begin = subject;
arguments.end = subject + length;
arguments.mark_ptr = NULL;
/* JIT decreases this value less frequently than the interpreter. */
-arguments.calllimit = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : extra_data->match_limit;
+arguments.limit_match = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : (pcre_uint32)(extra_data->match_limit);
+if (functions->limit_match != 0 && functions->limit_match < arguments.limit_match)
+ arguments.limit_match = functions->limit_match;
arguments.notbol = (options & PCRE_NOTBOL) != 0;
arguments.noteol = (options & PCRE_NOTEOL) != 0;
arguments.notempty = (options & PCRE_NOTEMPTY) != 0;
arguments.notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
arguments.offsets = offsets;
+arguments.callout_data = (extra_data->flags & PCRE_EXTRA_CALLOUT_DATA) != 0 ? extra_data->callout_data : NULL;
+arguments.real_offset_count = offset_count;
-/* pcre_exec() rounds offsetcount to a multiple of 3, and then uses only 2/3 of
+/* pcre_exec() rounds offset_count to a multiple of 3, and then uses only 2/3 of
the output vector for storing captured strings, with the remainder used as
workspace. We don't need the workspace here. For compatibility, we limit the
number of captured strings in the same way as pcre_exec(), so that the user
gets the same result with and without JIT. */
-if (offsetcount != 2)
- offsetcount = ((offsetcount - (offsetcount % 3)) * 2) / 3;
-maxoffsetcount = functions->top_bracket;
-if (offsetcount > maxoffsetcount)
- offsetcount = maxoffsetcount;
-arguments.offsetcount = offsetcount;
+if (offset_count != 2)
+ offset_count = ((offset_count - (offset_count % 3)) * 2) / 3;
+max_offset_count = functions->top_bracket;
+if (offset_count > max_offset_count)
+ offset_count = max_offset_count;
+arguments.offset_count = offset_count;
if (functions->callback)
arguments.stack = (struct sljit_stack *)functions->callback(functions->userdata);
@@ -8331,7 +9608,7 @@ else
retval = convert_executable_func.call_executable_func(&arguments);
}
-if (retval * 2 > offsetcount)
+if (retval * 2 > offset_count)
retval = 0;
if ((extra_data->flags & PCRE_EXTRA_MARK) != 0)
*(extra_data->mark) = arguments.mark_ptr;
@@ -8343,17 +9620,17 @@ return retval;
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_jit_exec(const pcre *argument_re, const pcre_extra *extra_data,
PCRE_SPTR subject, int length, int start_offset, int options,
- int *offsets, int offsetcount, pcre_jit_stack *stack)
+ int *offsets, int offset_count, pcre_jit_stack *stack)
#elif defined COMPILE_PCRE16
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre16_jit_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
PCRE_SPTR16 subject, int length, int start_offset, int options,
- int *offsets, int offsetcount, pcre16_jit_stack *stack)
+ int *offsets, int offset_count, pcre16_jit_stack *stack)
#elif defined COMPILE_PCRE32
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre32_jit_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
PCRE_SPTR32 subject, int length, int start_offset, int options,
- int *offsets, int offsetcount, pcre32_jit_stack *stack)
+ int *offsets, int offset_count, pcre32_jit_stack *stack)
#endif
{
pcre_uchar *subject_ptr = (pcre_uchar *)subject;
@@ -8363,7 +9640,7 @@ union {
jit_function call_executable_func;
} convert_executable_func;
jit_arguments arguments;
-int maxoffsetcount;
+int max_offset_count;
int retval;
int mode = JIT_COMPILE;
@@ -8387,30 +9664,34 @@ arguments.begin = subject_ptr;
arguments.end = subject_ptr + length;
arguments.mark_ptr = NULL;
/* JIT decreases this value less frequently than the interpreter. */
-arguments.calllimit = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : extra_data->match_limit;
+arguments.limit_match = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : (pcre_uint32)(extra_data->match_limit);
+if (functions->limit_match != 0 && functions->limit_match < arguments.limit_match)
+ arguments.limit_match = functions->limit_match;
arguments.notbol = (options & PCRE_NOTBOL) != 0;
arguments.noteol = (options & PCRE_NOTEOL) != 0;
arguments.notempty = (options & PCRE_NOTEMPTY) != 0;
arguments.notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
arguments.offsets = offsets;
+arguments.callout_data = (extra_data->flags & PCRE_EXTRA_CALLOUT_DATA) != 0 ? extra_data->callout_data : NULL;
+arguments.real_offset_count = offset_count;
-/* pcre_exec() rounds offsetcount to a multiple of 3, and then uses only 2/3 of
+/* pcre_exec() rounds offset_count to a multiple of 3, and then uses only 2/3 of
the output vector for storing captured strings, with the remainder used as
workspace. We don't need the workspace here. For compatibility, we limit the
number of captured strings in the same way as pcre_exec(), so that the user
gets the same result with and without JIT. */
-if (offsetcount != 2)
- offsetcount = ((offsetcount - (offsetcount % 3)) * 2) / 3;
-maxoffsetcount = functions->top_bracket;
-if (offsetcount > maxoffsetcount)
- offsetcount = maxoffsetcount;
-arguments.offsetcount = offsetcount;
+if (offset_count != 2)
+ offset_count = ((offset_count - (offset_count % 3)) * 2) / 3;
+max_offset_count = functions->top_bracket;
+if (offset_count > max_offset_count)
+ offset_count = max_offset_count;
+arguments.offset_count = offset_count;
convert_executable_func.executable_func = functions->executable_funcs[mode];
retval = convert_executable_func.call_executable_func(&arguments);
-if (retval * 2 > offsetcount)
+if (retval * 2 > offset_count)
retval = 0;
if ((extra_data->flags & PCRE_EXTRA_MARK) != 0)
*(extra_data->mark) = arguments.mark_ptr;
@@ -8504,6 +9785,20 @@ if (extra != NULL &&
}
}
+#if defined COMPILE_PCRE8
+PCRE_EXP_DECL void
+pcre_jit_free_unused_memory(void)
+#elif defined COMPILE_PCRE16
+PCRE_EXP_DECL void
+pcre16_jit_free_unused_memory(void)
+#elif defined COMPILE_PCRE32
+PCRE_EXP_DECL void
+pcre32_jit_free_unused_memory(void)
+#endif
+{
+sljit_free_unused_memory_exec();
+}
+
#else /* SUPPORT_JIT */
/* These are dummy functions to avoid linking errors when JIT support is not
@@ -8555,6 +9850,19 @@ pcre32_assign_jit_stack(pcre32_extra *extra, pcre32_jit_callback callback, void
(void)userdata;
}
+#if defined COMPILE_PCRE8
+PCRE_EXP_DECL void
+pcre_jit_free_unused_memory(void)
+#elif defined COMPILE_PCRE16
+PCRE_EXP_DECL void
+pcre16_jit_free_unused_memory(void)
+#elif defined COMPILE_PCRE32
+PCRE_EXP_DECL void
+pcre32_jit_free_unused_memory(void)
+#endif
+{
+}
+
#endif
/* End of pcre_jit_compile.c */
diff --git a/src/3rdparty/pcre/pcre_maketables.c b/src/3rdparty/pcre/pcre_maketables.c
index de1ea65cf6..17a2625fa0 100644
--- a/src/3rdparty/pcre/pcre_maketables.c
+++ b/src/3rdparty/pcre/pcre_maketables.c
@@ -98,13 +98,17 @@ for (i = 0; i < 256; i++) *p++ = tolower(i);
for (i = 0; i < 256; i++) *p++ = islower(i)? toupper(i) : tolower(i);
/* Then the character class tables. Don't try to be clever and save effort on
-exclusive ones - in some locales things may be different. Note that the table
-for "space" includes everything "isspace" gives, including VT in the default
-locale. This makes it work for the POSIX class [:space:]. Note also that it is
-possible for a character to be alnum or alpha without being lower or upper,
-such as "male and female ordinals" (\xAA and \xBA) in the fr_FR locale (at
-least under Debian Linux's locales as of 12/2005). So we must test for alnum
-specially. */
+exclusive ones - in some locales things may be different.
+
+Note that the table for "space" includes everything "isspace" gives, including
+VT in the default locale. This makes it work for the POSIX class [:space:].
+From release 8.34 is is also correct for Perl space, because Perl added VT at
+release 5.18.
+
+Note also that it is possible for a character to be alnum or alpha without
+being lower or upper, such as "male and female ordinals" (\xAA and \xBA) in the
+fr_FR locale (at least under Debian Linux's locales as of 12/2005). So we must
+test for alnum specially. */
memset(p, 0, cbit_length);
for (i = 0; i < 256; i++)
@@ -123,14 +127,15 @@ for (i = 0; i < 256; i++)
}
p += cbit_length;
-/* Finally, the character type table. In this, we exclude VT from the white
-space chars, because Perl doesn't recognize it as such for \s and for comments
-within regexes. */
+/* Finally, the character type table. In this, we used to exclude VT from the
+white space chars, because Perl didn't recognize it as such for \s and for
+comments within regexes. However, Perl changed at release 5.18, so PCRE changed
+at release 8.34. */
for (i = 0; i < 256; i++)
{
int x = 0;
- if (i != CHAR_VT && isspace(i)) x += ctype_space;
+ if (isspace(i)) x += ctype_space;
if (isalpha(i)) x += ctype_letter;
if (isdigit(i)) x += ctype_digit;
if (isxdigit(i)) x += ctype_xdigit;
diff --git a/src/3rdparty/pcre/pcre_string_utils.c b/src/3rdparty/pcre/pcre_string_utils.c
index a9b4e77c00..2601ce385a 100644
--- a/src/3rdparty/pcre/pcre_string_utils.c
+++ b/src/3rdparty/pcre/pcre_string_utils.c
@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
- Copyright (c) 1997-2012 University of Cambridge
+ Copyright (c) 1997-2013 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -38,8 +38,8 @@ POSSIBILITY OF SUCH DAMAGE.
*/
-/* This module contains an internal function that is used to match an extended
-class. It is used by both pcre_exec() and pcre_def_exec(). */
+/* This module contains internal functions for comparing and finding the length
+of strings for different data item sizes. */
#ifdef PCRE_HAVE_CONFIG_H
@@ -54,7 +54,7 @@ class. It is used by both pcre_exec() and pcre_def_exec(). */
* Compare string utilities *
*************************************************/
-/* The following two functions compares two strings. Basically an strcmp
+/* The following two functions compares two strings. Basically a strcmp
for non 8 bit characters.
Arguments:
diff --git a/src/3rdparty/pcre/pcre_study.c b/src/3rdparty/pcre/pcre_study.c
index 6040e4dbdc..0478a00c1a 100644
--- a/src/3rdparty/pcre/pcre_study.c
+++ b/src/3rdparty/pcre/pcre_study.c
@@ -66,8 +66,9 @@ string of that length that matches. In UTF8 mode, the result is in characters
rather than bytes.
Arguments:
+ re compiled pattern block
code pointer to start of group (the bracket)
- startcode pointer to start of the whole pattern
+ startcode pointer to start of the whole pattern's code
options the compiling options
int RECURSE depth
@@ -78,8 +79,8 @@ Returns: the minimum length
*/
static int
-find_minlength(const pcre_uchar *code, const pcre_uchar *startcode, int options,
- int recurse_depth)
+find_minlength(const REAL_PCRE *re, const pcre_uchar *code,
+ const pcre_uchar *startcode, int options, int recurse_depth)
{
int length = -1;
/* PCRE_UTF16 has the same value as PCRE_UTF8. */
@@ -129,7 +130,7 @@ for (;;)
case OP_SBRAPOS:
case OP_ONCE:
case OP_ONCE_NC:
- d = find_minlength(cc, startcode, options, recurse_depth);
+ d = find_minlength(re, cc, startcode, options, recurse_depth);
if (d < 0) return d;
branchlength += d;
do cc += GET(cc, 1); while (*cc == OP_ALT);
@@ -175,9 +176,9 @@ for (;;)
case OP_REVERSE:
case OP_CREF:
- case OP_NCREF:
+ case OP_DNCREF:
case OP_RREF:
- case OP_NRREF:
+ case OP_DNRREF:
case OP_DEF:
case OP_CALLOUT:
case OP_SOD:
@@ -341,6 +342,7 @@ for (;;)
{
case OP_CRPLUS:
case OP_CRMINPLUS:
+ case OP_CRPOSPLUS:
branchlength++;
/* Fall through */
@@ -348,11 +350,14 @@ for (;;)
case OP_CRMINSTAR:
case OP_CRQUERY:
case OP_CRMINQUERY:
+ case OP_CRPOSSTAR:
+ case OP_CRPOSQUERY:
cc++;
break;
case OP_CRRANGE:
case OP_CRMINRANGE:
+ case OP_CRPOSRANGE:
branchlength += GET2(cc,1);
cc += 1 + 2 * IMM2_SIZE;
break;
@@ -375,7 +380,38 @@ for (;;)
matches an empty string (by default it causes a matching failure), so in
that case we must set the minimum length to zero. */
- case OP_REF:
+ case OP_DNREF: /* Duplicate named pattern back reference */
+ case OP_DNREFI:
+ if ((options & PCRE_JAVASCRIPT_COMPAT) == 0)
+ {
+ int count = GET2(cc, 1+IMM2_SIZE);
+ pcre_uchar *slot = (pcre_uchar *)re +
+ re->name_table_offset + GET2(cc, 1) * re->name_entry_size;
+ d = INT_MAX;
+ while (count-- > 0)
+ {
+ ce = cs = (pcre_uchar *)PRIV(find_bracket)(startcode, utf, GET2(slot, 0));
+ if (cs == NULL) return -2;
+ do ce += GET(ce, 1); while (*ce == OP_ALT);
+ if (cc > cs && cc < ce)
+ {
+ d = 0;
+ had_recurse = TRUE;
+ break;
+ }
+ else
+ {
+ int dd = find_minlength(re, cs, startcode, options, recurse_depth);
+ if (dd < d) d = dd;
+ }
+ slot += re->name_entry_size;
+ }
+ }
+ else d = 0;
+ cc += 1 + 2*IMM2_SIZE;
+ goto REPEAT_BACK_REFERENCE;
+
+ case OP_REF: /* Single back reference */
case OP_REFI:
if ((options & PCRE_JAVASCRIPT_COMPAT) == 0)
{
@@ -389,7 +425,7 @@ for (;;)
}
else
{
- d = find_minlength(cs, startcode, options, recurse_depth);
+ d = find_minlength(re, cs, startcode, options, recurse_depth);
}
}
else d = 0;
@@ -397,24 +433,29 @@ for (;;)
/* Handle repeated back references */
+ REPEAT_BACK_REFERENCE:
switch (*cc)
{
case OP_CRSTAR:
case OP_CRMINSTAR:
case OP_CRQUERY:
case OP_CRMINQUERY:
+ case OP_CRPOSSTAR:
+ case OP_CRPOSQUERY:
min = 0;
cc++;
break;
case OP_CRPLUS:
case OP_CRMINPLUS:
+ case OP_CRPOSPLUS:
min = 1;
cc++;
break;
case OP_CRRANGE:
case OP_CRMINRANGE:
+ case OP_CRPOSRANGE:
min = GET2(cc, 1);
cc += 1 + 2 * IMM2_SIZE;
break;
@@ -437,7 +478,8 @@ for (;;)
had_recurse = TRUE;
else
{
- branchlength += find_minlength(cs, startcode, options, recurse_depth + 1);
+ branchlength += find_minlength(re, cs, startcode, options,
+ recurse_depth + 1);
}
cc += 1 + LINK_SIZE;
break;
@@ -778,6 +820,10 @@ do
case OP_COND:
case OP_CREF:
case OP_DEF:
+ case OP_DNCREF:
+ case OP_DNREF:
+ case OP_DNREFI:
+ case OP_DNRREF:
case OP_DOLL:
case OP_DOLLM:
case OP_END:
@@ -786,7 +832,6 @@ do
case OP_EXTUNI:
case OP_FAIL:
case OP_MARK:
- case OP_NCREF:
case OP_NOT:
case OP_NOTEXACT:
case OP_NOTEXACTI:
@@ -818,7 +863,6 @@ do
case OP_NOTUPTOI:
case OP_NOT_HSPACE:
case OP_NOT_VSPACE:
- case OP_NRREF:
case OP_PROP:
case OP_PRUNE:
case OP_PRUNE_ARG:
@@ -1183,24 +1227,16 @@ do
set_type_bits(start_bits, cbit_digit, table_limit, cd);
break;
- /* The cbit_space table has vertical tab as whitespace; we have to
- ensure it gets set as not whitespace. Luckily, the code value is the
- same (0x0b) in ASCII and EBCDIC, so we can just adjust the appropriate
- bit. */
+ /* The cbit_space table has vertical tab as whitespace; we no longer
+ have to play fancy tricks because Perl added VT to its whitespace at
+ release 5.18. PCRE added it at release 8.34. */
case OP_NOT_WHITESPACE:
set_nottype_bits(start_bits, cbit_space, table_limit, cd);
- start_bits[1] |= 0x08;
break;
- /* The cbit_space table has vertical tab as whitespace; we have to
- avoid setting it. Luckily, the code value is the same (0x0b) in ASCII
- and EBCDIC, so we can just adjust the appropriate bit. */
-
case OP_WHITESPACE:
- c = start_bits[1]; /* Save in case it was already set */
set_type_bits(start_bits, cbit_space, table_limit, cd);
- start_bits[1] = (start_bits[1] & ~0x08) | c;
break;
case OP_NOT_WORDCHAR:
@@ -1277,11 +1313,14 @@ do
case OP_CRMINSTAR:
case OP_CRQUERY:
case OP_CRMINQUERY:
+ case OP_CRPOSSTAR:
+ case OP_CRPOSQUERY:
tcode++;
break;
case OP_CRRANGE:
case OP_CRMINRANGE:
+ case OP_CRPOSRANGE:
if (GET2(tcode, 1) == 0) tcode += 1 + 2 * IMM2_SIZE;
else try_next = FALSE;
break;
@@ -1346,6 +1385,7 @@ pcre_uchar *code;
compile_data compile_block;
const REAL_PCRE *re = (const REAL_PCRE *)external_re;
+
*errorptr = NULL;
if (re == NULL || re->magic_number != MAGIC_NUMBER)
@@ -1422,7 +1462,7 @@ if ((re->options & PCRE_ANCHORED) == 0 &&
/* Find the minimum length of subject string. */
-switch(min = find_minlength(code, code, re->options, 0))
+switch(min = find_minlength(re, code, code, re->options, 0))
{
case -2: *errorptr = "internal error: missing capturing bracket"; return NULL;
case -3: *errorptr = "internal error: opcode not recognized"; return NULL;
diff --git a/src/3rdparty/pcre/pcre_tables.c b/src/3rdparty/pcre/pcre_tables.c
index a50b87371c..c5e1d8059b 100644
--- a/src/3rdparty/pcre/pcre_tables.c
+++ b/src/3rdparty/pcre/pcre_tables.c
@@ -346,6 +346,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Xan0 STR_X STR_a STR_n "\0"
#define STRING_Xps0 STR_X STR_p STR_s "\0"
#define STRING_Xsp0 STR_X STR_s STR_p "\0"
+#define STRING_Xuc0 STR_X STR_u STR_c "\0"
#define STRING_Xwd0 STR_X STR_w STR_d "\0"
#define STRING_Yi0 STR_Y STR_i "\0"
#define STRING_Z0 STR_Z "\0"
@@ -493,6 +494,7 @@ const char PRIV(utt_names)[] =
STRING_Xan0
STRING_Xps0
STRING_Xsp0
+ STRING_Xuc0
STRING_Xwd0
STRING_Yi0
STRING_Z0
@@ -640,12 +642,13 @@ const ucp_type_table PRIV(utt)[] = {
{ 1011, PT_ALNUM, 0 },
{ 1015, PT_PXSPACE, 0 },
{ 1019, PT_SPACE, 0 },
- { 1023, PT_WORD, 0 },
- { 1027, PT_SC, ucp_Yi },
- { 1030, PT_GC, ucp_Z },
- { 1032, PT_PC, ucp_Zl },
- { 1035, PT_PC, ucp_Zp },
- { 1038, PT_PC, ucp_Zs }
+ { 1023, PT_UCNC, 0 },
+ { 1027, PT_WORD, 0 },
+ { 1031, PT_SC, ucp_Yi },
+ { 1034, PT_GC, ucp_Z },
+ { 1036, PT_PC, ucp_Zl },
+ { 1039, PT_PC, ucp_Zp },
+ { 1042, PT_PC, ucp_Zs }
};
const int PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
diff --git a/src/3rdparty/pcre/pcre_ucd.c b/src/3rdparty/pcre/pcre_ucd.c
index 003f04d3d7..33041821fc 100644
--- a/src/3rdparty/pcre/pcre_ucd.c
+++ b/src/3rdparty/pcre/pcre_ucd.c
@@ -20,7 +20,7 @@ needed. */
/* Unicode character database. */
/* This file was autogenerated by the MultiStage2.py script. */
-/* Total size: 65696 bytes, block size: 128. */
+/* Total size: 65688 bytes, block size: 128. */
/* The tables herein are needed only when UCP support is built
into PCRE. This module should not be referenced otherwise, so
@@ -79,7 +79,7 @@ const pcre_uint32 PRIV(ucd_caseless_sets)[] = {
#ifndef PCRE_INCLUDED
-const ucd_record PRIV(ucd_records)[] = { /* 5024 bytes, record size 8 */
+const ucd_record PRIV(ucd_records)[] = { /* 5016 bytes, record size 8 */
{ 9, 0, 2, 0, 0, }, /* 0 */
{ 9, 0, 1, 0, 0, }, /* 1 */
{ 9, 0, 0, 0, 0, }, /* 2 */
@@ -422,7 +422,7 @@ const ucd_record PRIV(ucd_records)[] = { /* 5024 bytes, record size 8 */
{ 37, 21, 12, 0, 0, }, /* 339 */
{ 37, 17, 12, 0, 0, }, /* 340 */
{ 37, 12, 3, 0, 0, }, /* 341 */
- { 37, 29, 12, 0, 0, }, /* 342 */
+ { 37, 1, 2, 0, 0, }, /* 342 */
{ 37, 13, 12, 0, 0, }, /* 343 */
{ 37, 7, 12, 0, 0, }, /* 344 */
{ 37, 6, 12, 0, 0, }, /* 345 */
@@ -598,116 +598,115 @@ const ucd_record PRIV(ucd_records)[] = { /* 5024 bytes, record size 8 */
{ 83, 10, 5, 0, 0, }, /* 515 */
{ 83, 7, 12, 0, 0, }, /* 516 */
{ 83, 21, 12, 0, 0, }, /* 517 */
- { 83, 6, 12, 0, 0, }, /* 518 */
- { 83, 13, 12, 0, 0, }, /* 519 */
- { 67, 7, 12, 0, 0, }, /* 520 */
- { 67, 12, 3, 0, 0, }, /* 521 */
- { 67, 10, 5, 0, 0, }, /* 522 */
- { 67, 13, 12, 0, 0, }, /* 523 */
- { 67, 21, 12, 0, 0, }, /* 524 */
- { 38, 6, 12, 0, 0, }, /* 525 */
- { 91, 7, 12, 0, 0, }, /* 526 */
- { 91, 12, 3, 0, 0, }, /* 527 */
- { 91, 6, 12, 0, 0, }, /* 528 */
- { 91, 21, 12, 0, 0, }, /* 529 */
- { 86, 7, 12, 0, 0, }, /* 530 */
- { 86, 10, 5, 0, 0, }, /* 531 */
- { 86, 12, 3, 0, 0, }, /* 532 */
- { 86, 21, 12, 0, 0, }, /* 533 */
- { 86, 6, 12, 0, 0, }, /* 534 */
- { 86, 13, 12, 0, 0, }, /* 535 */
- { 23, 7, 9, 0, 0, }, /* 536 */
- { 23, 7, 10, 0, 0, }, /* 537 */
- { 9, 4, 2, 0, 0, }, /* 538 */
- { 9, 3, 12, 0, 0, }, /* 539 */
- { 25, 25, 12, 0, 0, }, /* 540 */
- { 0, 24, 12, 0, 0, }, /* 541 */
- { 9, 6, 3, 0, 0, }, /* 542 */
- { 35, 7, 12, 0, 0, }, /* 543 */
- { 19, 14, 12, 0, 0, }, /* 544 */
- { 19, 15, 12, 0, 0, }, /* 545 */
- { 19, 26, 12, 0, 0, }, /* 546 */
- { 70, 7, 12, 0, 0, }, /* 547 */
- { 66, 7, 12, 0, 0, }, /* 548 */
- { 41, 7, 12, 0, 0, }, /* 549 */
- { 41, 15, 12, 0, 0, }, /* 550 */
- { 18, 7, 12, 0, 0, }, /* 551 */
- { 18, 14, 12, 0, 0, }, /* 552 */
- { 59, 7, 12, 0, 0, }, /* 553 */
- { 59, 21, 12, 0, 0, }, /* 554 */
- { 42, 7, 12, 0, 0, }, /* 555 */
- { 42, 21, 12, 0, 0, }, /* 556 */
- { 42, 14, 12, 0, 0, }, /* 557 */
- { 13, 9, 12, 0, 40, }, /* 558 */
- { 13, 5, 12, 0, -40, }, /* 559 */
- { 46, 7, 12, 0, 0, }, /* 560 */
- { 44, 7, 12, 0, 0, }, /* 561 */
- { 44, 13, 12, 0, 0, }, /* 562 */
- { 11, 7, 12, 0, 0, }, /* 563 */
- { 80, 7, 12, 0, 0, }, /* 564 */
- { 80, 21, 12, 0, 0, }, /* 565 */
- { 80, 15, 12, 0, 0, }, /* 566 */
- { 65, 7, 12, 0, 0, }, /* 567 */
- { 65, 15, 12, 0, 0, }, /* 568 */
- { 65, 21, 12, 0, 0, }, /* 569 */
- { 71, 7, 12, 0, 0, }, /* 570 */
- { 71, 21, 12, 0, 0, }, /* 571 */
- { 97, 7, 12, 0, 0, }, /* 572 */
- { 96, 7, 12, 0, 0, }, /* 573 */
- { 30, 7, 12, 0, 0, }, /* 574 */
- { 30, 12, 3, 0, 0, }, /* 575 */
- { 30, 15, 12, 0, 0, }, /* 576 */
- { 30, 21, 12, 0, 0, }, /* 577 */
- { 87, 7, 12, 0, 0, }, /* 578 */
- { 87, 15, 12, 0, 0, }, /* 579 */
- { 87, 21, 12, 0, 0, }, /* 580 */
- { 77, 7, 12, 0, 0, }, /* 581 */
- { 77, 21, 12, 0, 0, }, /* 582 */
- { 82, 7, 12, 0, 0, }, /* 583 */
- { 82, 15, 12, 0, 0, }, /* 584 */
- { 81, 7, 12, 0, 0, }, /* 585 */
- { 81, 15, 12, 0, 0, }, /* 586 */
- { 88, 7, 12, 0, 0, }, /* 587 */
- { 0, 15, 12, 0, 0, }, /* 588 */
- { 93, 10, 5, 0, 0, }, /* 589 */
- { 93, 12, 3, 0, 0, }, /* 590 */
- { 93, 7, 12, 0, 0, }, /* 591 */
- { 93, 21, 12, 0, 0, }, /* 592 */
- { 93, 15, 12, 0, 0, }, /* 593 */
- { 93, 13, 12, 0, 0, }, /* 594 */
- { 84, 12, 3, 0, 0, }, /* 595 */
- { 84, 10, 5, 0, 0, }, /* 596 */
- { 84, 7, 12, 0, 0, }, /* 597 */
- { 84, 21, 12, 0, 0, }, /* 598 */
- { 84, 1, 2, 0, 0, }, /* 599 */
- { 100, 7, 12, 0, 0, }, /* 600 */
- { 100, 13, 12, 0, 0, }, /* 601 */
- { 95, 12, 3, 0, 0, }, /* 602 */
- { 95, 7, 12, 0, 0, }, /* 603 */
- { 95, 10, 5, 0, 0, }, /* 604 */
- { 95, 13, 12, 0, 0, }, /* 605 */
- { 95, 21, 12, 0, 0, }, /* 606 */
- { 99, 12, 3, 0, 0, }, /* 607 */
- { 99, 10, 5, 0, 0, }, /* 608 */
- { 99, 7, 12, 0, 0, }, /* 609 */
- { 99, 21, 12, 0, 0, }, /* 610 */
- { 99, 13, 12, 0, 0, }, /* 611 */
- { 101, 7, 12, 0, 0, }, /* 612 */
- { 101, 12, 3, 0, 0, }, /* 613 */
- { 101, 10, 5, 0, 0, }, /* 614 */
- { 101, 13, 12, 0, 0, }, /* 615 */
- { 62, 7, 12, 0, 0, }, /* 616 */
- { 62, 14, 12, 0, 0, }, /* 617 */
- { 62, 21, 12, 0, 0, }, /* 618 */
- { 79, 7, 12, 0, 0, }, /* 619 */
- { 98, 7, 12, 0, 0, }, /* 620 */
- { 98, 10, 5, 0, 0, }, /* 621 */
- { 98, 12, 3, 0, 0, }, /* 622 */
- { 98, 6, 12, 0, 0, }, /* 623 */
- { 9, 10, 3, 0, 0, }, /* 624 */
- { 19, 12, 3, 0, 0, }, /* 625 */
- { 9, 26, 11, 0, 0, }, /* 626 */
- { 26, 26, 12, 0, 0, }, /* 627 */
+ { 83, 13, 12, 0, 0, }, /* 518 */
+ { 67, 7, 12, 0, 0, }, /* 519 */
+ { 67, 12, 3, 0, 0, }, /* 520 */
+ { 67, 10, 5, 0, 0, }, /* 521 */
+ { 67, 13, 12, 0, 0, }, /* 522 */
+ { 67, 21, 12, 0, 0, }, /* 523 */
+ { 38, 6, 12, 0, 0, }, /* 524 */
+ { 91, 7, 12, 0, 0, }, /* 525 */
+ { 91, 12, 3, 0, 0, }, /* 526 */
+ { 91, 6, 12, 0, 0, }, /* 527 */
+ { 91, 21, 12, 0, 0, }, /* 528 */
+ { 86, 7, 12, 0, 0, }, /* 529 */
+ { 86, 10, 5, 0, 0, }, /* 530 */
+ { 86, 12, 3, 0, 0, }, /* 531 */
+ { 86, 21, 12, 0, 0, }, /* 532 */
+ { 86, 6, 12, 0, 0, }, /* 533 */
+ { 86, 13, 12, 0, 0, }, /* 534 */
+ { 23, 7, 9, 0, 0, }, /* 535 */
+ { 23, 7, 10, 0, 0, }, /* 536 */
+ { 9, 4, 2, 0, 0, }, /* 537 */
+ { 9, 3, 12, 0, 0, }, /* 538 */
+ { 25, 25, 12, 0, 0, }, /* 539 */
+ { 0, 24, 12, 0, 0, }, /* 540 */
+ { 9, 6, 3, 0, 0, }, /* 541 */
+ { 35, 7, 12, 0, 0, }, /* 542 */
+ { 19, 14, 12, 0, 0, }, /* 543 */
+ { 19, 15, 12, 0, 0, }, /* 544 */
+ { 19, 26, 12, 0, 0, }, /* 545 */
+ { 70, 7, 12, 0, 0, }, /* 546 */
+ { 66, 7, 12, 0, 0, }, /* 547 */
+ { 41, 7, 12, 0, 0, }, /* 548 */
+ { 41, 15, 12, 0, 0, }, /* 549 */
+ { 18, 7, 12, 0, 0, }, /* 550 */
+ { 18, 14, 12, 0, 0, }, /* 551 */
+ { 59, 7, 12, 0, 0, }, /* 552 */
+ { 59, 21, 12, 0, 0, }, /* 553 */
+ { 42, 7, 12, 0, 0, }, /* 554 */
+ { 42, 21, 12, 0, 0, }, /* 555 */
+ { 42, 14, 12, 0, 0, }, /* 556 */
+ { 13, 9, 12, 0, 40, }, /* 557 */
+ { 13, 5, 12, 0, -40, }, /* 558 */
+ { 46, 7, 12, 0, 0, }, /* 559 */
+ { 44, 7, 12, 0, 0, }, /* 560 */
+ { 44, 13, 12, 0, 0, }, /* 561 */
+ { 11, 7, 12, 0, 0, }, /* 562 */
+ { 80, 7, 12, 0, 0, }, /* 563 */
+ { 80, 21, 12, 0, 0, }, /* 564 */
+ { 80, 15, 12, 0, 0, }, /* 565 */
+ { 65, 7, 12, 0, 0, }, /* 566 */
+ { 65, 15, 12, 0, 0, }, /* 567 */
+ { 65, 21, 12, 0, 0, }, /* 568 */
+ { 71, 7, 12, 0, 0, }, /* 569 */
+ { 71, 21, 12, 0, 0, }, /* 570 */
+ { 97, 7, 12, 0, 0, }, /* 571 */
+ { 96, 7, 12, 0, 0, }, /* 572 */
+ { 30, 7, 12, 0, 0, }, /* 573 */
+ { 30, 12, 3, 0, 0, }, /* 574 */
+ { 30, 15, 12, 0, 0, }, /* 575 */
+ { 30, 21, 12, 0, 0, }, /* 576 */
+ { 87, 7, 12, 0, 0, }, /* 577 */
+ { 87, 15, 12, 0, 0, }, /* 578 */
+ { 87, 21, 12, 0, 0, }, /* 579 */
+ { 77, 7, 12, 0, 0, }, /* 580 */
+ { 77, 21, 12, 0, 0, }, /* 581 */
+ { 82, 7, 12, 0, 0, }, /* 582 */
+ { 82, 15, 12, 0, 0, }, /* 583 */
+ { 81, 7, 12, 0, 0, }, /* 584 */
+ { 81, 15, 12, 0, 0, }, /* 585 */
+ { 88, 7, 12, 0, 0, }, /* 586 */
+ { 0, 15, 12, 0, 0, }, /* 587 */
+ { 93, 10, 5, 0, 0, }, /* 588 */
+ { 93, 12, 3, 0, 0, }, /* 589 */
+ { 93, 7, 12, 0, 0, }, /* 590 */
+ { 93, 21, 12, 0, 0, }, /* 591 */
+ { 93, 15, 12, 0, 0, }, /* 592 */
+ { 93, 13, 12, 0, 0, }, /* 593 */
+ { 84, 12, 3, 0, 0, }, /* 594 */
+ { 84, 10, 5, 0, 0, }, /* 595 */
+ { 84, 7, 12, 0, 0, }, /* 596 */
+ { 84, 21, 12, 0, 0, }, /* 597 */
+ { 84, 1, 2, 0, 0, }, /* 598 */
+ { 100, 7, 12, 0, 0, }, /* 599 */
+ { 100, 13, 12, 0, 0, }, /* 600 */
+ { 95, 12, 3, 0, 0, }, /* 601 */
+ { 95, 7, 12, 0, 0, }, /* 602 */
+ { 95, 10, 5, 0, 0, }, /* 603 */
+ { 95, 13, 12, 0, 0, }, /* 604 */
+ { 95, 21, 12, 0, 0, }, /* 605 */
+ { 99, 12, 3, 0, 0, }, /* 606 */
+ { 99, 10, 5, 0, 0, }, /* 607 */
+ { 99, 7, 12, 0, 0, }, /* 608 */
+ { 99, 21, 12, 0, 0, }, /* 609 */
+ { 99, 13, 12, 0, 0, }, /* 610 */
+ { 101, 7, 12, 0, 0, }, /* 611 */
+ { 101, 12, 3, 0, 0, }, /* 612 */
+ { 101, 10, 5, 0, 0, }, /* 613 */
+ { 101, 13, 12, 0, 0, }, /* 614 */
+ { 62, 7, 12, 0, 0, }, /* 615 */
+ { 62, 14, 12, 0, 0, }, /* 616 */
+ { 62, 21, 12, 0, 0, }, /* 617 */
+ { 79, 7, 12, 0, 0, }, /* 618 */
+ { 98, 7, 12, 0, 0, }, /* 619 */
+ { 98, 10, 5, 0, 0, }, /* 620 */
+ { 98, 12, 3, 0, 0, }, /* 621 */
+ { 98, 6, 12, 0, 0, }, /* 622 */
+ { 9, 10, 3, 0, 0, }, /* 623 */
+ { 19, 12, 3, 0, 0, }, /* 624 */
+ { 9, 26, 11, 0, 0, }, /* 625 */
+ { 26, 26, 12, 0, 0, }, /* 626 */
};
const pcre_uint8 PRIV(ucd_stage1)[] = { /* 8704 bytes */
@@ -1380,7 +1379,7 @@ const pcre_uint16 PRIV(ucd_stage2)[] = { /* 51968 bytes, block = 128 */
/* block 12 */
185,185,185,185,185,109,186,186,186,187,187,188, 4,187,189,189,
-190,190,190,190,190,190,190,190,190,190,190, 4,109,109,187, 4,
+190,190,190,190,190,190,190,190,190,190,190, 4,185,109,187, 4,
191,191,191,191,191,191,191,191,191,191,191,191,191,191,191,191,
191,191,191,191,191,191,191,191,191,191,191,191,191,191,191,191,
102,191,191,191,191,191,191,191,191,191,191,104,104,104,104,104,
@@ -1760,7 +1759,7 @@ const pcre_uint16 PRIV(ucd_stage2)[] = { /* 51968 bytes, block = 128 */
/* block 50 */
360,360,360,360,360,360,360,360,360,360,360,360,360,360,360,360,
-360,360,360,360,360,360,360,361,361,362,362,362,109,109,363,363,
+360,360,360,360,360,360,360,361,361,362,362,361,109,109,363,363,
364,364,364,364,364,364,364,364,364,364,364,364,364,364,364,364,
364,364,364,364,364,364,364,364,364,364,364,364,364,364,364,364,
364,364,364,364,364,364,364,364,364,364,364,364,364,364,364,364,
@@ -1885,7 +1884,7 @@ const pcre_uint16 PRIV(ucd_stage2)[] = { /* 51968 bytes, block = 128 */
4, 4, 4, 4, 4, 4, 4, 4, 4, 21, 25, 4, 4, 4, 4, 15,
15, 4, 4, 4, 8, 6, 7, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 8, 4, 15, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3,
- 22, 22, 22, 22, 22,426,426,426,426,426, 22, 22, 22, 22, 22, 22,
+ 22, 22, 22, 22, 22,426, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
23,101,109,109, 23, 23, 23, 23, 23, 23, 8, 8, 8, 6, 7,101,
/* block 63 */
@@ -1929,7 +1928,7 @@ const pcre_uint16 PRIV(ucd_stage2)[] = { /* 51968 bytes, block = 128 */
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
/* block 67 */
- 19, 19, 19, 19, 19, 19, 19, 19, 8, 8, 8, 8, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 6, 7, 6, 7, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
8, 8, 19, 19, 19, 19, 19, 19, 19, 6, 7, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
@@ -2353,30 +2352,30 @@ const pcre_uint16 PRIV(ucd_stage2)[] = { /* 51968 bytes, block = 128 */
516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,
516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,
516,516,516,514,515,515,514,514,514,514,515,515,514,515,515,515,
-515,517,517,517,517,517,517,517,517,517,517,517,517,517,109,518,
-519,519,519,519,519,519,519,519,519,519,109,109,109,109,517,517,
+515,517,517,517,517,517,517,517,517,517,517,517,517,517,109,102,
+518,518,518,518,518,518,518,518,518,518,109,109,109,109,517,517,
109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
/* block 110 */
-520,520,520,520,520,520,520,520,520,520,520,520,520,520,520,520,
-520,520,520,520,520,520,520,520,520,520,520,520,520,520,520,520,
-520,520,520,520,520,520,520,520,520,521,521,521,521,521,521,522,
-522,521,521,522,522,521,521,109,109,109,109,109,109,109,109,109,
-520,520,520,521,520,520,520,520,520,520,520,520,521,522,109,109,
-523,523,523,523,523,523,523,523,523,523,109,109,524,524,524,524,
+519,519,519,519,519,519,519,519,519,519,519,519,519,519,519,519,
+519,519,519,519,519,519,519,519,519,519,519,519,519,519,519,519,
+519,519,519,519,519,519,519,519,519,520,520,520,520,520,520,521,
+521,520,520,521,521,520,520,109,109,109,109,109,109,109,109,109,
+519,519,519,520,519,519,519,519,519,519,519,519,520,521,109,109,
+522,522,522,522,522,522,522,522,522,522,109,109,523,523,523,523,
295,295,295,295,295,295,295,295,295,295,295,295,295,295,295,295,
-525,295,295,295,295,295,295,301,301,301,295,296,109,109,109,109,
+524,295,295,295,295,295,295,301,301,301,295,296,109,109,109,109,
/* block 111 */
-526,526,526,526,526,526,526,526,526,526,526,526,526,526,526,526,
-526,526,526,526,526,526,526,526,526,526,526,526,526,526,526,526,
-526,526,526,526,526,526,526,526,526,526,526,526,526,526,526,526,
-527,526,527,527,527,526,526,527,527,526,526,526,526,526,527,527,
-526,527,526,109,109,109,109,109,109,109,109,109,109,109,109,109,
-109,109,109,109,109,109,109,109,109,109,109,526,526,528,529,529,
-530,530,530,530,530,530,530,530,530,530,530,531,532,532,531,531,
-533,533,530,534,534,531,532,109,109,109,109,109,109,109,109,109,
+525,525,525,525,525,525,525,525,525,525,525,525,525,525,525,525,
+525,525,525,525,525,525,525,525,525,525,525,525,525,525,525,525,
+525,525,525,525,525,525,525,525,525,525,525,525,525,525,525,525,
+526,525,526,526,526,525,525,526,526,525,525,525,525,525,526,526,
+525,526,525,109,109,109,109,109,109,109,109,109,109,109,109,109,
+109,109,109,109,109,109,109,109,109,109,109,525,525,527,528,528,
+529,529,529,529,529,529,529,529,529,529,529,530,531,531,530,530,
+532,532,529,533,533,530,531,109,109,109,109,109,109,109,109,109,
/* block 112 */
109,308,308,308,308,308,308,109,109,308,308,308,308,308,308,109,
@@ -2393,85 +2392,85 @@ const pcre_uint16 PRIV(ucd_stage2)[] = { /* 51968 bytes, block = 128 */
109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
-530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,
-530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,
-530,530,530,531,531,532,531,531,532,531,531,533,531,532,109,109,
-535,535,535,535,535,535,535,535,535,535,109,109,109,109,109,109,
+529,529,529,529,529,529,529,529,529,529,529,529,529,529,529,529,
+529,529,529,529,529,529,529,529,529,529,529,529,529,529,529,529,
+529,529,529,530,530,531,530,530,531,530,530,532,530,531,109,109,
+534,534,534,534,534,534,534,534,534,534,109,109,109,109,109,109,
/* block 114 */
-536,537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,
-537,537,537,537,537,537,537,537,537,537,537,537,536,537,537,537,
-537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,
-537,537,537,537,537,537,537,537,536,537,537,537,537,537,537,537,
-537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,
-537,537,537,537,536,537,537,537,537,537,537,537,537,537,537,537,
-537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,
-536,537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,
+535,536,536,536,536,536,536,536,536,536,536,536,536,536,536,536,
+536,536,536,536,536,536,536,536,536,536,536,536,535,536,536,536,
+536,536,536,536,536,536,536,536,536,536,536,536,536,536,536,536,
+536,536,536,536,536,536,536,536,535,536,536,536,536,536,536,536,
+536,536,536,536,536,536,536,536,536,536,536,536,536,536,536,536,
+536,536,536,536,535,536,536,536,536,536,536,536,536,536,536,536,
+536,536,536,536,536,536,536,536,536,536,536,536,536,536,536,536,
+535,536,536,536,536,536,536,536,536,536,536,536,536,536,536,536,
/* block 115 */
-537,537,537,537,537,537,537,537,537,537,537,537,536,537,537,537,
-537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,
-537,537,537,537,537,537,537,537,536,537,537,537,537,537,537,537,
-537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,
-537,537,537,537,536,537,537,537,537,537,537,537,537,537,537,537,
-537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,
-536,537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,
-537,537,537,537,537,537,537,537,537,537,537,537,536,537,537,537,
+536,536,536,536,536,536,536,536,536,536,536,536,535,536,536,536,
+536,536,536,536,536,536,536,536,536,536,536,536,536,536,536,536,
+536,536,536,536,536,536,536,536,535,536,536,536,536,536,536,536,
+536,536,536,536,536,536,536,536,536,536,536,536,536,536,536,536,
+536,536,536,536,535,536,536,536,536,536,536,536,536,536,536,536,
+536,536,536,536,536,536,536,536,536,536,536,536,536,536,536,536,
+535,536,536,536,536,536,536,536,536,536,536,536,536,536,536,536,
+536,536,536,536,536,536,536,536,536,536,536,536,535,536,536,536,
/* block 116 */
-537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,
-537,537,537,537,537,537,537,537,536,537,537,537,537,537,537,537,
-537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,
-537,537,537,537,536,537,537,537,537,537,537,537,537,537,537,537,
-537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,
-536,537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,
-537,537,537,537,537,537,537,537,537,537,537,537,536,537,537,537,
-537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,
+536,536,536,536,536,536,536,536,536,536,536,536,536,536,536,536,
+536,536,536,536,536,536,536,536,535,536,536,536,536,536,536,536,
+536,536,536,536,536,536,536,536,536,536,536,536,536,536,536,536,
+536,536,536,536,535,536,536,536,536,536,536,536,536,536,536,536,
+536,536,536,536,536,536,536,536,536,536,536,536,536,536,536,536,
+535,536,536,536,536,536,536,536,536,536,536,536,536,536,536,536,
+536,536,536,536,536,536,536,536,536,536,536,536,535,536,536,536,
+536,536,536,536,536,536,536,536,536,536,536,536,536,536,536,536,
/* block 117 */
-537,537,537,537,537,537,537,537,536,537,537,537,537,537,537,537,
-537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,
-537,537,537,537,536,537,537,537,537,537,537,537,537,537,537,537,
-537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,
-536,537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,
-537,537,537,537,537,537,537,537,537,537,537,537,536,537,537,537,
-537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,
-537,537,537,537,537,537,537,537,536,537,537,537,537,537,537,537,
+536,536,536,536,536,536,536,536,535,536,536,536,536,536,536,536,
+536,536,536,536,536,536,536,536,536,536,536,536,536,536,536,536,
+536,536,536,536,535,536,536,536,536,536,536,536,536,536,536,536,
+536,536,536,536,536,536,536,536,536,536,536,536,536,536,536,536,
+535,536,536,536,536,536,536,536,536,536,536,536,536,536,536,536,
+536,536,536,536,536,536,536,536,536,536,536,536,535,536,536,536,
+536,536,536,536,536,536,536,536,536,536,536,536,536,536,536,536,
+536,536,536,536,536,536,536,536,535,536,536,536,536,536,536,536,
/* block 118 */
-537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,
-537,537,537,537,536,537,537,537,537,537,537,537,537,537,537,537,
-537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,
-536,537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,
-537,537,537,537,537,537,537,537,537,537,537,537,536,537,537,537,
-537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,
-537,537,537,537,537,537,537,537,536,537,537,537,537,537,537,537,
-537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,
+536,536,536,536,536,536,536,536,536,536,536,536,536,536,536,536,
+536,536,536,536,535,536,536,536,536,536,536,536,536,536,536,536,
+536,536,536,536,536,536,536,536,536,536,536,536,536,536,536,536,
+535,536,536,536,536,536,536,536,536,536,536,536,536,536,536,536,
+536,536,536,536,536,536,536,536,536,536,536,536,535,536,536,536,
+536,536,536,536,536,536,536,536,536,536,536,536,536,536,536,536,
+536,536,536,536,536,536,536,536,535,536,536,536,536,536,536,536,
+536,536,536,536,536,536,536,536,536,536,536,536,536,536,536,536,
/* block 119 */
-537,537,537,537,536,537,537,537,537,537,537,537,537,537,537,537,
-537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,
-536,537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,
-537,537,537,537,537,537,537,537,537,537,537,537,536,537,537,537,
-537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,
-537,537,537,537,537,537,537,537,536,537,537,537,537,537,537,537,
-537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,
-537,537,537,537,536,537,537,537,537,537,537,537,537,537,537,537,
+536,536,536,536,535,536,536,536,536,536,536,536,536,536,536,536,
+536,536,536,536,536,536,536,536,536,536,536,536,536,536,536,536,
+535,536,536,536,536,536,536,536,536,536,536,536,536,536,536,536,
+536,536,536,536,536,536,536,536,536,536,536,536,535,536,536,536,
+536,536,536,536,536,536,536,536,536,536,536,536,536,536,536,536,
+536,536,536,536,536,536,536,536,535,536,536,536,536,536,536,536,
+536,536,536,536,536,536,536,536,536,536,536,536,536,536,536,536,
+536,536,536,536,535,536,536,536,536,536,536,536,536,536,536,536,
/* block 120 */
-537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,
-536,537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,
-537,537,537,537,537,537,537,537,537,537,537,537,536,537,537,537,
-537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,
-537,537,537,537,537,537,537,537,536,537,537,537,537,537,537,537,
-537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,
-537,537,537,537,536,537,537,537,537,537,537,537,537,537,537,537,
-537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,
+536,536,536,536,536,536,536,536,536,536,536,536,536,536,536,536,
+535,536,536,536,536,536,536,536,536,536,536,536,536,536,536,536,
+536,536,536,536,536,536,536,536,536,536,536,536,535,536,536,536,
+536,536,536,536,536,536,536,536,536,536,536,536,536,536,536,536,
+536,536,536,536,536,536,536,536,535,536,536,536,536,536,536,536,
+536,536,536,536,536,536,536,536,536,536,536,536,536,536,536,536,
+536,536,536,536,535,536,536,536,536,536,536,536,536,536,536,536,
+536,536,536,536,536,536,536,536,536,536,536,536,536,536,536,536,
/* block 121 */
-537,537,537,537,537,537,537,537,536,537,537,537,537,537,537,537,
-537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,
-537,537,537,537,109,109,109,109,109,109,109,109,109,109,109,109,
+536,536,536,536,536,536,536,536,535,536,536,536,536,536,536,536,
+536,536,536,536,536,536,536,536,536,536,536,536,536,536,536,536,
+536,536,536,536,109,109,109,109,109,109,109,109,109,109,109,109,
306,306,306,306,306,306,306,306,306,306,306,306,306,306,306,306,
306,306,306,306,306,306,306,109,109,109,109,307,307,307,307,307,
307,307,307,307,307,307,307,307,307,307,307,307,307,307,307,307,
@@ -2479,6 +2478,16 @@ const pcre_uint16 PRIV(ucd_stage2)[] = { /* 51968 bytes, block = 128 */
307,307,307,307,307,307,307,307,307,307,307,307,109,109,109,109,
/* block 122 */
+537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,
+537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,
+537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,
+537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,
+537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,
+537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,
+537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,
+537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,
+
+/* block 123 */
538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,
538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,
538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,
@@ -2488,16 +2497,6 @@ const pcre_uint16 PRIV(ucd_stage2)[] = { /* 51968 bytes, block = 128 */
538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,
538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,
-/* block 123 */
-539,539,539,539,539,539,539,539,539,539,539,539,539,539,539,539,
-539,539,539,539,539,539,539,539,539,539,539,539,539,539,539,539,
-539,539,539,539,539,539,539,539,539,539,539,539,539,539,539,539,
-539,539,539,539,539,539,539,539,539,539,539,539,539,539,539,539,
-539,539,539,539,539,539,539,539,539,539,539,539,539,539,539,539,
-539,539,539,539,539,539,539,539,539,539,539,539,539,539,539,539,
-539,539,539,539,539,539,539,539,539,539,539,539,539,539,539,539,
-539,539,539,539,539,539,539,539,539,539,539,539,539,539,539,539,
-
/* block 124 */
475,475,475,475,475,475,475,475,475,475,475,475,475,475,475,475,
475,475,475,475,475,475,475,475,475,475,475,475,475,475,475,475,
@@ -2521,7 +2520,7 @@ const pcre_uint16 PRIV(ucd_stage2)[] = { /* 51968 bytes, block = 128 */
/* block 126 */
33, 33, 33, 33, 33, 33, 33,109,109,109,109,109,109,109,109,109,
109,109,109,178,178,178,178,178,109,109,109,109,109,184,181,184,
-184,184,184,184,184,184,184,184,184,540,184,184,184,184,184,184,
+184,184,184,184,184,184,184,184,184,539,184,184,184,184,184,184,
184,184,184,184,184,184,184,109,184,184,184,184,184,109,184,109,
184,184,109,184,184,109,184,184,184,184,184,184,184,184,184,184,
191,191,191,191,191,191,191,191,191,191,191,191,191,191,191,191,
@@ -2532,8 +2531,8 @@ const pcre_uint16 PRIV(ucd_stage2)[] = { /* 51968 bytes, block = 128 */
191,191,191,191,191,191,191,191,191,191,191,191,191,191,191,191,
191,191,191,191,191,191,191,191,191,191,191,191,191,191,191,191,
191,191,191,191,191,191,191,191,191,191,191,191,191,191,191,191,
-191,191,541,541,541,541,541,541,541,541,541,541,541,541,541,541,
-541,541,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
+191,191,540,540,540,540,540,540,540,540,540,540,540,540,540,540,
+540,540,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
109,109,109,191,191,191,191,191,191,191,191,191,191,191,191,191,
191,191,191,191,191,191,191,191,191,191,191,191,191,191,191,191,
191,191,191,191,191,191,191,191,191,191,191,191,191,191,191,191,
@@ -2600,7 +2599,7 @@ const pcre_uint16 PRIV(ucd_stage2)[] = { /* 51968 bytes, block = 128 */
/* block 134 */
469,469,469,469,469,469,469,469,469,469,469,469,469,469,469,469,
-469,469,469,469,469,469,469,469,469,469,469,469,469,469,542,542,
+469,469,469,469,469,469,469,469,469,469,469,469,469,469,541,541,
472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,
472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,109,
109,109,472,472,472,472,472,472,109,109,472,472,472,472,472,472,
@@ -2609,37 +2608,37 @@ const pcre_uint16 PRIV(ucd_stage2)[] = { /* 51968 bytes, block = 128 */
426,426,426,426,426,426,426,426,426, 22, 22, 22, 19, 19,109,109,
/* block 135 */
-543,543,543,543,543,543,543,543,543,543,543,543,109,543,543,543,
-543,543,543,543,543,543,543,543,543,543,543,543,543,543,543,543,
-543,543,543,543,543,543,543,109,543,543,543,543,543,543,543,543,
-543,543,543,543,543,543,543,543,543,543,543,109,543,543,109,543,
-543,543,543,543,543,543,543,543,543,543,543,543,543,543,109,109,
-543,543,543,543,543,543,543,543,543,543,543,543,543,543,109,109,
+542,542,542,542,542,542,542,542,542,542,542,542,109,542,542,542,
+542,542,542,542,542,542,542,542,542,542,542,542,542,542,542,542,
+542,542,542,542,542,542,542,109,542,542,542,542,542,542,542,542,
+542,542,542,542,542,542,542,542,542,542,542,109,542,542,109,542,
+542,542,542,542,542,542,542,542,542,542,542,542,542,542,109,109,
+542,542,542,542,542,542,542,542,542,542,542,542,542,542,109,109,
109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
/* block 136 */
-543,543,543,543,543,543,543,543,543,543,543,543,543,543,543,543,
-543,543,543,543,543,543,543,543,543,543,543,543,543,543,543,543,
-543,543,543,543,543,543,543,543,543,543,543,543,543,543,543,543,
-543,543,543,543,543,543,543,543,543,543,543,543,543,543,543,543,
-543,543,543,543,543,543,543,543,543,543,543,543,543,543,543,543,
-543,543,543,543,543,543,543,543,543,543,543,543,543,543,543,543,
-543,543,543,543,543,543,543,543,543,543,543,543,543,543,543,543,
-543,543,543,543,543,543,543,543,543,543,543,109,109,109,109,109,
+542,542,542,542,542,542,542,542,542,542,542,542,542,542,542,542,
+542,542,542,542,542,542,542,542,542,542,542,542,542,542,542,542,
+542,542,542,542,542,542,542,542,542,542,542,542,542,542,542,542,
+542,542,542,542,542,542,542,542,542,542,542,542,542,542,542,542,
+542,542,542,542,542,542,542,542,542,542,542,542,542,542,542,542,
+542,542,542,542,542,542,542,542,542,542,542,542,542,542,542,542,
+542,542,542,542,542,542,542,542,542,542,542,542,542,542,542,542,
+542,542,542,542,542,542,542,542,542,542,542,109,109,109,109,109,
/* block 137 */
4, 4, 4,109,109,109,109, 23, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23,109,109,109, 19, 19, 19, 19, 19, 19, 19, 19, 19,
-544,544,544,544,544,544,544,544,544,544,544,544,544,544,544,544,
-544,544,544,544,544,544,544,544,544,544,544,544,544,544,544,544,
-544,544,544,544,544,544,544,544,544,544,544,544,544,544,544,544,
-544,544,544,544,544,545,545,545,545,546,546,546,546,546,546,546,
+543,543,543,543,543,543,543,543,543,543,543,543,543,543,543,543,
+543,543,543,543,543,543,543,543,543,543,543,543,543,543,543,543,
+543,543,543,543,543,543,543,543,543,543,543,543,543,543,543,543,
+543,543,543,543,543,544,544,544,544,545,545,545,545,545,545,545,
/* block 138 */
-546,546,546,546,546,546,546,546,546,546,545,109,109,109,109,109,
+545,545,545,545,545,545,545,545,545,545,544,109,109,109,109,109,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,109,109,109,109,
109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
@@ -2649,49 +2648,49 @@ const pcre_uint16 PRIV(ucd_stage2)[] = { /* 51968 bytes, block = 128 */
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,104,109,109,
/* block 139 */
+546,546,546,546,546,546,546,546,546,546,546,546,546,546,546,546,
+546,546,546,546,546,546,546,546,546,546,546,546,546,109,109,109,
547,547,547,547,547,547,547,547,547,547,547,547,547,547,547,547,
-547,547,547,547,547,547,547,547,547,547,547,547,547,109,109,109,
-548,548,548,548,548,548,548,548,548,548,548,548,548,548,548,548,
-548,548,548,548,548,548,548,548,548,548,548,548,548,548,548,548,
-548,548,548,548,548,548,548,548,548,548,548,548,548,548,548,548,
-548,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
+547,547,547,547,547,547,547,547,547,547,547,547,547,547,547,547,
+547,547,547,547,547,547,547,547,547,547,547,547,547,547,547,547,
+547,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
/* block 140 */
-549,549,549,549,549,549,549,549,549,549,549,549,549,549,549,549,
-549,549,549,549,549,549,549,549,549,549,549,549,549,549,549,109,
-550,550,550,550,109,109,109,109,109,109,109,109,109,109,109,109,
-551,551,551,551,551,551,551,551,551,551,551,551,551,551,551,551,
-551,552,551,551,551,551,551,551,551,551,552,109,109,109,109,109,
+548,548,548,548,548,548,548,548,548,548,548,548,548,548,548,548,
+548,548,548,548,548,548,548,548,548,548,548,548,548,548,548,109,
+549,549,549,549,109,109,109,109,109,109,109,109,109,109,109,109,
+550,550,550,550,550,550,550,550,550,550,550,550,550,550,550,550,
+550,551,550,550,550,550,550,550,550,550,551,109,109,109,109,109,
109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
/* block 141 */
-553,553,553,553,553,553,553,553,553,553,553,553,553,553,553,553,
-553,553,553,553,553,553,553,553,553,553,553,553,553,553,109,554,
-555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,
-555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,
-555,555,555,555,109,109,109,109,555,555,555,555,555,555,555,555,
-556,557,557,557,557,557,109,109,109,109,109,109,109,109,109,109,
+552,552,552,552,552,552,552,552,552,552,552,552,552,552,552,552,
+552,552,552,552,552,552,552,552,552,552,552,552,552,552,109,553,
+554,554,554,554,554,554,554,554,554,554,554,554,554,554,554,554,
+554,554,554,554,554,554,554,554,554,554,554,554,554,554,554,554,
+554,554,554,554,109,109,109,109,554,554,554,554,554,554,554,554,
+555,556,556,556,556,556,109,109,109,109,109,109,109,109,109,109,
109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
/* block 142 */
+557,557,557,557,557,557,557,557,557,557,557,557,557,557,557,557,
+557,557,557,557,557,557,557,557,557,557,557,557,557,557,557,557,
+557,557,557,557,557,557,557,557,558,558,558,558,558,558,558,558,
558,558,558,558,558,558,558,558,558,558,558,558,558,558,558,558,
558,558,558,558,558,558,558,558,558,558,558,558,558,558,558,558,
-558,558,558,558,558,558,558,558,559,559,559,559,559,559,559,559,
559,559,559,559,559,559,559,559,559,559,559,559,559,559,559,559,
559,559,559,559,559,559,559,559,559,559,559,559,559,559,559,559,
-560,560,560,560,560,560,560,560,560,560,560,560,560,560,560,560,
-560,560,560,560,560,560,560,560,560,560,560,560,560,560,560,560,
-560,560,560,560,560,560,560,560,560,560,560,560,560,560,560,560,
+559,559,559,559,559,559,559,559,559,559,559,559,559,559,559,559,
/* block 143 */
-561,561,561,561,561,561,561,561,561,561,561,561,561,561,561,561,
-561,561,561,561,561,561,561,561,561,561,561,561,561,561,109,109,
-562,562,562,562,562,562,562,562,562,562,109,109,109,109,109,109,
+560,560,560,560,560,560,560,560,560,560,560,560,560,560,560,560,
+560,560,560,560,560,560,560,560,560,560,560,560,560,560,109,109,
+561,561,561,561,561,561,561,561,561,561,109,109,109,109,109,109,
109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
@@ -2699,61 +2698,61 @@ const pcre_uint16 PRIV(ucd_stage2)[] = { /* 51968 bytes, block = 128 */
109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
/* block 144 */
-563,563,563,563,563,563,109,109,563,109,563,563,563,563,563,563,
+562,562,562,562,562,562,109,109,562,109,562,562,562,562,562,562,
+562,562,562,562,562,562,562,562,562,562,562,562,562,562,562,562,
+562,562,562,562,562,562,562,562,562,562,562,562,562,562,562,562,
+562,562,562,562,562,562,109,562,562,109,109,109,562,109,109,562,
563,563,563,563,563,563,563,563,563,563,563,563,563,563,563,563,
-563,563,563,563,563,563,563,563,563,563,563,563,563,563,563,563,
-563,563,563,563,563,563,109,563,563,109,109,109,563,109,109,563,
-564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,
-564,564,564,564,564,564,109,565,566,566,566,566,566,566,566,566,
+563,563,563,563,563,563,109,564,565,565,565,565,565,565,565,565,
109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
/* block 145 */
-567,567,567,567,567,567,567,567,567,567,567,567,567,567,567,567,
-567,567,567,567,567,567,568,568,568,568,568,568,109,109,109,569,
-570,570,570,570,570,570,570,570,570,570,570,570,570,570,570,570,
-570,570,570,570,570,570,570,570,570,570,109,109,109,109,109,571,
+566,566,566,566,566,566,566,566,566,566,566,566,566,566,566,566,
+566,566,566,566,566,566,567,567,567,567,567,567,109,109,109,568,
+569,569,569,569,569,569,569,569,569,569,569,569,569,569,569,569,
+569,569,569,569,569,569,569,569,569,569,109,109,109,109,109,570,
109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
/* block 146 */
+571,571,571,571,571,571,571,571,571,571,571,571,571,571,571,571,
+571,571,571,571,571,571,571,571,571,571,571,571,571,571,571,571,
572,572,572,572,572,572,572,572,572,572,572,572,572,572,572,572,
-572,572,572,572,572,572,572,572,572,572,572,572,572,572,572,572,
-573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,
-573,573,573,573,573,573,573,573,109,109,109,109,109,109,573,573,
+572,572,572,572,572,572,572,572,109,109,109,109,109,109,572,572,
109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
/* block 147 */
-574,575,575,575,109,575,575,109,109,109,109,109,575,575,575,575,
-574,574,574,574,109,574,574,574,109,574,574,574,574,574,574,574,
-574,574,574,574,574,574,574,574,574,574,574,574,574,574,574,574,
-574,574,574,574,109,109,109,109,575,575,575,109,109,109,109,575,
-576,576,576,576,576,576,576,576,109,109,109,109,109,109,109,109,
-577,577,577,577,577,577,577,577,577,109,109,109,109,109,109,109,
-578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,
-578,578,578,578,578,578,578,578,578,578,578,578,578,579,579,580,
+573,574,574,574,109,574,574,109,109,109,109,109,574,574,574,574,
+573,573,573,573,109,573,573,573,109,573,573,573,573,573,573,573,
+573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,
+573,573,573,573,109,109,109,109,574,574,574,109,109,109,109,574,
+575,575,575,575,575,575,575,575,109,109,109,109,109,109,109,109,
+576,576,576,576,576,576,576,576,576,109,109,109,109,109,109,109,
+577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
+577,577,577,577,577,577,577,577,577,577,577,577,577,578,578,579,
/* block 148 */
-581,581,581,581,581,581,581,581,581,581,581,581,581,581,581,581,
-581,581,581,581,581,581,581,581,581,581,581,581,581,581,581,581,
-581,581,581,581,581,581,581,581,581,581,581,581,581,581,581,581,
-581,581,581,581,581,581,109,109,109,582,582,582,582,582,582,582,
-583,583,583,583,583,583,583,583,583,583,583,583,583,583,583,583,
-583,583,583,583,583,583,109,109,584,584,584,584,584,584,584,584,
-585,585,585,585,585,585,585,585,585,585,585,585,585,585,585,585,
-585,585,585,109,109,109,109,109,586,586,586,586,586,586,586,586,
+580,580,580,580,580,580,580,580,580,580,580,580,580,580,580,580,
+580,580,580,580,580,580,580,580,580,580,580,580,580,580,580,580,
+580,580,580,580,580,580,580,580,580,580,580,580,580,580,580,580,
+580,580,580,580,580,580,109,109,109,581,581,581,581,581,581,581,
+582,582,582,582,582,582,582,582,582,582,582,582,582,582,582,582,
+582,582,582,582,582,582,109,109,583,583,583,583,583,583,583,583,
+584,584,584,584,584,584,584,584,584,584,584,584,584,584,584,584,
+584,584,584,109,109,109,109,109,585,585,585,585,585,585,585,585,
/* block 149 */
-587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
-587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
-587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
-587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
-587,587,587,587,587,587,587,587,587,109,109,109,109,109,109,109,
+586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,
+586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,
+586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,
+586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,
+586,586,586,586,586,586,586,586,586,109,109,109,109,109,109,109,
109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
@@ -2765,103 +2764,103 @@ const pcre_uint16 PRIV(ucd_stage2)[] = { /* 51968 bytes, block = 128 */
109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
-588,588,588,588,588,588,588,588,588,588,588,588,588,588,588,588,
-588,588,588,588,588,588,588,588,588,588,588,588,588,588,588,109,
+587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
+587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,109,
/* block 151 */
-589,590,589,591,591,591,591,591,591,591,591,591,591,591,591,591,
-591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,
-591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,
-591,591,591,591,591,591,591,591,590,590,590,590,590,590,590,590,
-590,590,590,590,590,590,590,592,592,592,592,592,592,592,109,109,
-109,109,593,593,593,593,593,593,593,593,593,593,593,593,593,593,
-593,593,593,593,593,593,594,594,594,594,594,594,594,594,594,594,
+588,589,588,590,590,590,590,590,590,590,590,590,590,590,590,590,
+590,590,590,590,590,590,590,590,590,590,590,590,590,590,590,590,
+590,590,590,590,590,590,590,590,590,590,590,590,590,590,590,590,
+590,590,590,590,590,590,590,590,589,589,589,589,589,589,589,589,
+589,589,589,589,589,589,589,591,591,591,591,591,591,591,109,109,
+109,109,592,592,592,592,592,592,592,592,592,592,592,592,592,592,
+592,592,592,592,592,592,593,593,593,593,593,593,593,593,593,593,
109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
/* block 152 */
-595,595,596,597,597,597,597,597,597,597,597,597,597,597,597,597,
-597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,
-597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,
-596,596,596,595,595,595,595,596,596,595,595,598,598,599,598,598,
-598,598,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
-600,600,600,600,600,600,600,600,600,600,600,600,600,600,600,600,
-600,600,600,600,600,600,600,600,600,109,109,109,109,109,109,109,
-601,601,601,601,601,601,601,601,601,601,109,109,109,109,109,109,
+594,594,595,596,596,596,596,596,596,596,596,596,596,596,596,596,
+596,596,596,596,596,596,596,596,596,596,596,596,596,596,596,596,
+596,596,596,596,596,596,596,596,596,596,596,596,596,596,596,596,
+595,595,595,594,594,594,594,595,595,594,594,597,597,598,597,597,
+597,597,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
+599,599,599,599,599,599,599,599,599,599,599,599,599,599,599,599,
+599,599,599,599,599,599,599,599,599,109,109,109,109,109,109,109,
+600,600,600,600,600,600,600,600,600,600,109,109,109,109,109,109,
/* block 153 */
-602,602,602,603,603,603,603,603,603,603,603,603,603,603,603,603,
-603,603,603,603,603,603,603,603,603,603,603,603,603,603,603,603,
-603,603,603,603,603,603,603,602,602,602,602,602,604,602,602,602,
-602,602,602,602,602,109,605,605,605,605,605,605,605,605,605,605,
-606,606,606,606,109,109,109,109,109,109,109,109,109,109,109,109,
+601,601,601,602,602,602,602,602,602,602,602,602,602,602,602,602,
+602,602,602,602,602,602,602,602,602,602,602,602,602,602,602,602,
+602,602,602,602,602,602,602,601,601,601,601,601,603,601,601,601,
+601,601,601,601,601,109,604,604,604,604,604,604,604,604,604,604,
+605,605,605,605,109,109,109,109,109,109,109,109,109,109,109,109,
109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
/* block 154 */
-607,607,608,609,609,609,609,609,609,609,609,609,609,609,609,609,
-609,609,609,609,609,609,609,609,609,609,609,609,609,609,609,609,
-609,609,609,609,609,609,609,609,609,609,609,609,609,609,609,609,
-609,609,609,608,608,608,607,607,607,607,607,607,607,607,607,608,
-608,609,609,609,609,610,610,610,610,109,109,109,109,109,109,109,
-611,611,611,611,611,611,611,611,611,611,109,109,109,109,109,109,
+606,606,607,608,608,608,608,608,608,608,608,608,608,608,608,608,
+608,608,608,608,608,608,608,608,608,608,608,608,608,608,608,608,
+608,608,608,608,608,608,608,608,608,608,608,608,608,608,608,608,
+608,608,608,607,607,607,606,606,606,606,606,606,606,606,606,607,
+607,608,608,608,608,609,609,609,609,109,109,109,109,109,109,109,
+610,610,610,610,610,610,610,610,610,610,109,109,109,109,109,109,
109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
/* block 155 */
-612,612,612,612,612,612,612,612,612,612,612,612,612,612,612,612,
-612,612,612,612,612,612,612,612,612,612,612,612,612,612,612,612,
-612,612,612,612,612,612,612,612,612,612,612,613,614,613,614,614,
-613,613,613,613,613,613,614,613,109,109,109,109,109,109,109,109,
-615,615,615,615,615,615,615,615,615,615,109,109,109,109,109,109,
+611,611,611,611,611,611,611,611,611,611,611,611,611,611,611,611,
+611,611,611,611,611,611,611,611,611,611,611,611,611,611,611,611,
+611,611,611,611,611,611,611,611,611,611,611,612,613,612,613,613,
+612,612,612,612,612,612,613,612,109,109,109,109,109,109,109,109,
+614,614,614,614,614,614,614,614,614,614,109,109,109,109,109,109,
109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
/* block 156 */
-616,616,616,616,616,616,616,616,616,616,616,616,616,616,616,616,
-616,616,616,616,616,616,616,616,616,616,616,616,616,616,616,616,
-616,616,616,616,616,616,616,616,616,616,616,616,616,616,616,616,
-616,616,616,616,616,616,616,616,616,616,616,616,616,616,616,616,
-616,616,616,616,616,616,616,616,616,616,616,616,616,616,616,616,
-616,616,616,616,616,616,616,616,616,616,616,616,616,616,616,616,
-616,616,616,616,616,616,616,616,616,616,616,616,616,616,616,616,
-616,616,616,616,616,616,616,616,616,616,616,616,616,616,616,616,
+615,615,615,615,615,615,615,615,615,615,615,615,615,615,615,615,
+615,615,615,615,615,615,615,615,615,615,615,615,615,615,615,615,
+615,615,615,615,615,615,615,615,615,615,615,615,615,615,615,615,
+615,615,615,615,615,615,615,615,615,615,615,615,615,615,615,615,
+615,615,615,615,615,615,615,615,615,615,615,615,615,615,615,615,
+615,615,615,615,615,615,615,615,615,615,615,615,615,615,615,615,
+615,615,615,615,615,615,615,615,615,615,615,615,615,615,615,615,
+615,615,615,615,615,615,615,615,615,615,615,615,615,615,615,615,
/* block 157 */
+615,615,615,615,615,615,615,615,615,615,615,615,615,615,615,615,
+615,615,615,615,615,615,615,615,615,615,615,615,615,615,615,615,
+615,615,615,615,615,615,615,615,615,615,615,615,615,615,615,615,
+615,615,615,615,615,615,615,615,615,615,615,615,615,615,615,615,
+615,615,615,615,615,615,615,615,615,615,615,615,615,615,615,615,
+615,615,615,615,615,615,615,615,615,615,615,615,615,615,615,615,
+615,615,615,615,615,615,615,615,615,615,615,615,615,615,615,109,
+109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
+
+/* block 158 */
616,616,616,616,616,616,616,616,616,616,616,616,616,616,616,616,
616,616,616,616,616,616,616,616,616,616,616,616,616,616,616,616,
616,616,616,616,616,616,616,616,616,616,616,616,616,616,616,616,
616,616,616,616,616,616,616,616,616,616,616,616,616,616,616,616,
616,616,616,616,616,616,616,616,616,616,616,616,616,616,616,616,
616,616,616,616,616,616,616,616,616,616,616,616,616,616,616,616,
-616,616,616,616,616,616,616,616,616,616,616,616,616,616,616,109,
-109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
-
-/* block 158 */
-617,617,617,617,617,617,617,617,617,617,617,617,617,617,617,617,
-617,617,617,617,617,617,617,617,617,617,617,617,617,617,617,617,
-617,617,617,617,617,617,617,617,617,617,617,617,617,617,617,617,
-617,617,617,617,617,617,617,617,617,617,617,617,617,617,617,617,
-617,617,617,617,617,617,617,617,617,617,617,617,617,617,617,617,
-617,617,617,617,617,617,617,617,617,617,617,617,617,617,617,617,
-617,617,617,109,109,109,109,109,109,109,109,109,109,109,109,109,
-618,618,618,618,109,109,109,109,109,109,109,109,109,109,109,109,
+616,616,616,109,109,109,109,109,109,109,109,109,109,109,109,109,
+617,617,617,617,109,109,109,109,109,109,109,109,109,109,109,109,
/* block 159 */
-619,619,619,619,619,619,619,619,619,619,619,619,619,619,619,619,
-619,619,619,619,619,619,619,619,619,619,619,619,619,619,619,619,
-619,619,619,619,619,619,619,619,619,619,619,619,619,619,619,619,
-619,619,619,619,619,619,619,619,619,619,619,619,619,619,619,619,
-619,619,619,619,619,619,619,619,619,619,619,619,619,619,619,619,
-619,619,619,619,619,619,619,619,619,619,619,619,619,619,619,619,
-619,619,619,619,619,619,619,619,619,619,619,619,619,619,619,619,
-619,619,619,619,619,619,619,619,619,619,619,619,619,619,619,619,
+618,618,618,618,618,618,618,618,618,618,618,618,618,618,618,618,
+618,618,618,618,618,618,618,618,618,618,618,618,618,618,618,618,
+618,618,618,618,618,618,618,618,618,618,618,618,618,618,618,618,
+618,618,618,618,618,618,618,618,618,618,618,618,618,618,618,618,
+618,618,618,618,618,618,618,618,618,618,618,618,618,618,618,618,
+618,618,618,618,618,618,618,618,618,618,618,618,618,618,618,618,
+618,618,618,618,618,618,618,618,618,618,618,618,618,618,618,618,
+618,618,618,618,618,618,618,618,618,618,618,618,618,618,618,618,
/* block 160 */
-619,619,619,619,619,619,619,619,619,619,619,619,619,619,619,619,
-619,619,619,619,619,619,619,619,619,619,619,619,619,619,619,619,
-619,619,619,619,619,619,619,619,619,619,619,619,619,619,619,109,
+618,618,618,618,618,618,618,618,618,618,618,618,618,618,618,618,
+618,618,618,618,618,618,618,618,618,618,618,618,618,618,618,618,
+618,618,618,618,618,618,618,618,618,618,618,618,618,618,618,109,
109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
@@ -2889,18 +2888,18 @@ const pcre_uint16 PRIV(ucd_stage2)[] = { /* 51968 bytes, block = 128 */
109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
/* block 163 */
+619,619,619,619,619,619,619,619,619,619,619,619,619,619,619,619,
+619,619,619,619,619,619,619,619,619,619,619,619,619,619,619,619,
+619,619,619,619,619,619,619,619,619,619,619,619,619,619,619,619,
+619,619,619,619,619,619,619,619,619,619,619,619,619,619,619,619,
+619,619,619,619,619,109,109,109,109,109,109,109,109,109,109,109,
+619,620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,
620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,
-620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,
-620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,
-620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,
-620,620,620,620,620,109,109,109,109,109,109,109,109,109,109,109,
-620,621,621,621,621,621,621,621,621,621,621,621,621,621,621,621,
-621,621,621,621,621,621,621,621,621,621,621,621,621,621,621,621,
-621,621,621,621,621,621,621,621,621,621,621,621,621,621,621,109,
+620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,109,
/* block 164 */
-109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,622,
-622,622,622,623,623,623,623,623,623,623,623,623,623,623,623,623,
+109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,621,
+621,621,621,622,622,622,622,622,622,622,622,622,622,622,622,622,
109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
@@ -2935,8 +2934,8 @@ const pcre_uint16 PRIV(ucd_stage2)[] = { /* 51968 bytes, block = 128 */
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19,624,395,104,104,104, 19, 19, 19,395,624,624,
-624,624,624, 22, 22, 22, 22, 22, 22, 22, 22,104,104,104,104,104,
+ 19, 19, 19, 19, 19,623,395,104,104,104, 19, 19, 19,395,623,623,
+623,623,623, 22, 22, 22, 22, 22, 22, 22, 22,104,104,104,104,104,
/* block 168 */
104,104,104, 19, 19,104,104,104,104,104,104,104, 19, 19, 19, 19,
@@ -2949,11 +2948,11 @@ const pcre_uint16 PRIV(ucd_stage2)[] = { /* 51968 bytes, block = 128 */
109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
/* block 169 */
-546,546,546,546,546,546,546,546,546,546,546,546,546,546,546,546,
-546,546,546,546,546,546,546,546,546,546,546,546,546,546,546,546,
-546,546,546,546,546,546,546,546,546,546,546,546,546,546,546,546,
-546,546,546,546,546,546,546,546,546,546,546,546,546,546,546,546,
-546,546,625,625,625,546,109,109,109,109,109,109,109,109,109,109,
+545,545,545,545,545,545,545,545,545,545,545,545,545,545,545,545,
+545,545,545,545,545,545,545,545,545,545,545,545,545,545,545,545,
+545,545,545,545,545,545,545,545,545,545,545,545,545,545,545,545,
+545,545,545,545,545,545,545,545,545,545,545,545,545,545,545,545,
+545,545,624,624,624,545,109,109,109,109,109,109,109,109,109,109,
109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
@@ -3105,11 +3104,11 @@ const pcre_uint16 PRIV(ucd_stage2)[] = { /* 51968 bytes, block = 128 */
109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,
-109,109,109,109,109,109,626,626,626,626,626,626,626,626,626,626,
-626,626,626,626,626,626,626,626,626,626,626,626,626,626,626,626,
+109,109,109,109,109,109,625,625,625,625,625,625,625,625,625,625,
+625,625,625,625,625,625,625,625,625,625,625,625,625,625,625,625,
/* block 185 */
-627, 19, 19,109,109,109,109,109,109,109,109,109,109,109,109,109,
+626, 19, 19,109,109,109,109,109,109,109,109,109,109,109,109,109,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,109,109,109,109,109,
@@ -3279,14 +3278,14 @@ const pcre_uint16 PRIV(ucd_stage2)[] = { /* 51968 bytes, block = 128 */
426,426,426,426,426,426,426,426,426,426,426,426,426,426,426,426,
/* block 202 */
-539,539,539,539,539,539,539,539,539,539,539,539,539,539,539,539,
-539,539,539,539,539,539,539,539,539,539,539,539,539,539,539,539,
-539,539,539,539,539,539,539,539,539,539,539,539,539,539,539,539,
-539,539,539,539,539,539,539,539,539,539,539,539,539,539,539,539,
-539,539,539,539,539,539,539,539,539,539,539,539,539,539,539,539,
-539,539,539,539,539,539,539,539,539,539,539,539,539,539,539,539,
-539,539,539,539,539,539,539,539,539,539,539,539,539,539,539,539,
-539,539,539,539,539,539,539,539,539,539,539,539,539,539,109,109,
+538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,
+538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,
+538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,
+538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,
+538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,
+538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,
+538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,
+538,538,538,538,538,538,538,538,538,538,538,538,538,538,109,109,
};
diff --git a/src/3rdparty/pcre/pcre_valid_utf8.c b/src/3rdparty/pcre/pcre_valid_utf8.c
index e5b533467d..1cf0a14710 100644
--- a/src/3rdparty/pcre/pcre_valid_utf8.c
+++ b/src/3rdparty/pcre/pcre_valid_utf8.c
@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
- Copyright (c) 1997-2012 University of Cambridge
+ Copyright (c) 1997-2013 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -92,7 +92,7 @@ PCRE_UTF8_ERR18 Overlong 5-byte sequence (won't ever occur)
PCRE_UTF8_ERR19 Overlong 6-byte sequence (won't ever occur)
PCRE_UTF8_ERR20 Isolated 0x80 byte (not within UTF-8 character)
PCRE_UTF8_ERR21 Byte with the illegal value 0xfe or 0xff
-PCRE_UTF8_ERR22 Non-character
+PCRE_UTF8_ERR22 Unused (was non-character)
Arguments:
string points to the string
@@ -118,7 +118,6 @@ if (length < 0)
for (p = string; length-- > 0; p++)
{
register pcre_uchar ab, c, d;
- pcre_uint32 v = 0;
c = *p;
if (c < 128) continue; /* ASCII character */
@@ -187,7 +186,6 @@ for (p = string; length-- > 0; p++)
*erroroffset = (int)(p - string) - 2;
return PCRE_UTF8_ERR14;
}
- v = ((c & 0x0f) << 12) | ((d & 0x3f) << 6) | (*p & 0x3f);
break;
/* 4-byte character. Check 3rd and 4th bytes for 0x80. Then check first 2
@@ -215,7 +213,6 @@ for (p = string; length-- > 0; p++)
*erroroffset = (int)(p - string) - 3;
return PCRE_UTF8_ERR13;
}
- v = ((c & 0x07) << 18) | ((d & 0x3f) << 12) | ((p[-1] & 0x3f) << 6) | (*p & 0x3f);
break;
/* 5-byte and 6-byte characters are not allowed by RFC 3629, and will be
@@ -290,14 +287,6 @@ for (p = string; length-- > 0; p++)
*erroroffset = (int)(p - string) - ab;
return (ab == 4)? PCRE_UTF8_ERR11 : PCRE_UTF8_ERR12;
}
-
- /* Reject non-characters. The pointer p is currently at the last byte of the
- character. */
- if ((v & 0xfffeu) == 0xfffeu || (v >= 0xfdd0 && v <= 0xfdef))
- {
- *erroroffset = (int)(p - string) - ab;
- return PCRE_UTF8_ERR22;
- }
}
#else /* Not SUPPORT_UTF */
diff --git a/src/3rdparty/pcre/pcre_xclass.c b/src/3rdparty/pcre/pcre_xclass.c
index 595cafb2aa..dce8580a3d 100644
--- a/src/3rdparty/pcre/pcre_xclass.c
+++ b/src/3rdparty/pcre/pcre_xclass.c
@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
- Copyright (c) 1997-2012 University of Cambridge
+ Copyright (c) 1997-2013 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -128,55 +128,120 @@ while ((t = *data++) != XCL_END)
else /* XCL_PROP & XCL_NOTPROP */
{
const ucd_record *prop = GET_UCD(c);
+ BOOL isprop = t == XCL_PROP;
switch(*data)
{
case PT_ANY:
- if (t == XCL_PROP) return !negated;
+ if (isprop) return !negated;
break;
case PT_LAMP:
if ((prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
- prop->chartype == ucp_Lt) == (t == XCL_PROP)) return !negated;
+ prop->chartype == ucp_Lt) == isprop) return !negated;
break;
case PT_GC:
- if ((data[1] == PRIV(ucp_gentype)[prop->chartype]) == (t == XCL_PROP))
+ if ((data[1] == PRIV(ucp_gentype)[prop->chartype]) == isprop)
return !negated;
break;
case PT_PC:
- if ((data[1] == prop->chartype) == (t == XCL_PROP)) return !negated;
+ if ((data[1] == prop->chartype) == isprop) return !negated;
break;
case PT_SC:
- if ((data[1] == prop->script) == (t == XCL_PROP)) return !negated;
+ if ((data[1] == prop->script) == isprop) return !negated;
break;
case PT_ALNUM:
if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
- PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (t == XCL_PROP))
+ PRIV(ucp_gentype)[prop->chartype] == ucp_N) == isprop)
return !negated;
break;
- case PT_SPACE: /* Perl space */
- if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
- c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
- == (t == XCL_PROP))
- return !negated;
- break;
+ /* Perl space used to exclude VT, but from Perl 5.18 it is included,
+ which means that Perl space and POSIX space are now identical. PCRE
+ was changed at release 8.34. */
+ case PT_SPACE: /* Perl space */
case PT_PXSPACE: /* POSIX space */
- if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
- c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
- c == CHAR_FF || c == CHAR_CR) == (t == XCL_PROP))
- return !negated;
+ switch(c)
+ {
+ HSPACE_CASES:
+ VSPACE_CASES:
+ if (isprop) return !negated;
+ break;
+
+ default:
+ if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == isprop)
+ return !negated;
+ break;
+ }
break;
case PT_WORD:
if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
PRIV(ucp_gentype)[prop->chartype] == ucp_N || c == CHAR_UNDERSCORE)
- == (t == XCL_PROP))
+ == isprop)
+ return !negated;
+ break;
+
+ case PT_UCNC:
+ if (c < 0xa0)
+ {
+ if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
+ c == CHAR_GRAVE_ACCENT) == isprop)
+ return !negated;
+ }
+ else
+ {
+ if ((c < 0xd800 || c > 0xdfff) == isprop)
+ return !negated;
+ }
+ break;
+
+ /* The following three properties can occur only in an XCLASS, as there
+ is no \p or \P coding for them. */
+
+ /* Graphic character. Implement this as not Z (space or separator) and
+ not C (other), except for Cf (format) with a few exceptions. This seems
+ to be what Perl does. The exceptional characters are:
+
+ U+061C Arabic Letter Mark
+ U+180E Mongolian Vowel Separator
+ U+2066 - U+2069 Various "isolate"s
+ */
+
+ case PT_PXGRAPH:
+ if ((PRIV(ucp_gentype)[prop->chartype] != ucp_Z &&
+ (PRIV(ucp_gentype)[prop->chartype] != ucp_C ||
+ (prop->chartype == ucp_Cf &&
+ c != 0x061c && c != 0x180e && (c < 0x2066 || c > 0x2069))
+ )) == isprop)
+ return !negated;
+ break;
+
+ /* Printable character: same as graphic, with the addition of Zs, i.e.
+ not Zl and not Zp, and U+180E. */
+
+ case PT_PXPRINT:
+ if ((prop->chartype != ucp_Zl &&
+ prop->chartype != ucp_Zp &&
+ (PRIV(ucp_gentype)[prop->chartype] != ucp_C ||
+ (prop->chartype == ucp_Cf &&
+ c != 0x061c && (c < 0x2066 || c > 0x2069))
+ )) == isprop)
+ return !negated;
+ break;
+
+ /* Punctuation: all Unicode punctuation, plus ASCII characters that
+ Unicode treats as symbols rather than punctuation, for Perl
+ compatibility (these are $+<=>^`|~). */
+
+ case PT_PXPUNCT:
+ if ((PRIV(ucp_gentype)[prop->chartype] == ucp_P ||
+ (c < 256 && PRIV(ucp_gentype)[prop->chartype] == ucp_S)) == isprop)
return !negated;
break;
diff --git a/src/3rdparty/pcre/sljit/sljitConfig.h b/src/3rdparty/pcre/sljit/sljitConfig.h
index 68bc59d089..4f0fe4463a 100644
--- a/src/3rdparty/pcre/sljit/sljitConfig.h
+++ b/src/3rdparty/pcre/sljit/sljitConfig.h
@@ -48,6 +48,7 @@
/* #define SLJIT_CONFIG_PPC_64 1 */
/* #define SLJIT_CONFIG_MIPS_32 1 */
/* #define SLJIT_CONFIG_SPARC_32 1 */
+/* #define SLJIT_CONFIG_TILEGX 1 */
/* #define SLJIT_CONFIG_AUTO 1 */
/* #define SLJIT_CONFIG_UNSUPPORTED 1 */
diff --git a/src/3rdparty/pcre/sljit/sljitConfigInternal.h b/src/3rdparty/pcre/sljit/sljitConfigInternal.h
index bc945fbcab..af455df063 100644
--- a/src/3rdparty/pcre/sljit/sljitConfigInternal.h
+++ b/src/3rdparty/pcre/sljit/sljitConfigInternal.h
@@ -63,6 +63,7 @@
|| (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \
|| (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \
|| (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) \
+ || (defined SLJIT_CONFIG_TILEGX && SLJIT_CONFIG_TILEGX) \
|| (defined SLJIT_CONFIG_AUTO && SLJIT_CONFIG_AUTO) \
|| (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED))
#error "An architecture must be selected"
@@ -76,6 +77,7 @@
+ (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) \
+ (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) \
+ (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \
+ + (defined SLJIT_CONFIG_TILEGX && SLJIT_CONFIG_TILEGX) \
+ (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \
+ (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) \
+ (defined SLJIT_CONFIG_AUTO && SLJIT_CONFIG_AUTO) \
@@ -104,10 +106,12 @@
#define SLJIT_CONFIG_PPC_64 1
#elif defined(__ppc__) || defined(__powerpc__) || defined(_ARCH_PPC) || defined(_ARCH_PWR) || defined(_ARCH_PWR2) || defined(_POWER)
#define SLJIT_CONFIG_PPC_32 1
-#elif defined(__mips__)
+#elif defined(__mips__) && !defined(_LP64)
#define SLJIT_CONFIG_MIPS_32 1
#elif defined(__sparc__) || defined(__sparc)
#define SLJIT_CONFIG_SPARC_32 1
+#elif defined(__tilegx__)
+#define SLJIT_CONFIG_TILEGX 1
#else
/* Unsupported architecture */
#define SLJIT_CONFIG_UNSUPPORTED 1
@@ -173,9 +177,13 @@
#endif /* !defined(SLJIT_LIKELY) && !defined(SLJIT_UNLIKELY) */
#ifndef SLJIT_INLINE
-/* Inline functions. */
+/* Inline functions. Some old compilers do not support them. */
+#if defined(__SUNPRO_C) && __SUNPRO_C <= 0x510
+#define SLJIT_INLINE
+#else
#define SLJIT_INLINE __inline
#endif
+#endif /* !SLJIT_INLINE */
#ifndef SLJIT_CONST
/* Const variables. */
@@ -266,7 +274,9 @@ typedef signed int sljit_si;
#define SLJIT_WORD_SHIFT 0
typedef unsigned long int sljit_uw;
typedef long int sljit_sw;
-#elif !(defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !(defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+#elif !(defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) \
+ && !(defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \
+ && !(defined SLJIT_CONFIG_TILEGX && SLJIT_CONFIG_TILEGX)
#define SLJIT_32BIT_ARCHITECTURE 1
#define SLJIT_WORD_SHIFT 2
typedef unsigned int sljit_uw;
@@ -311,7 +321,7 @@ typedef double sljit_d;
/* ABI (Application Binary Interface) types. */
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
-#if defined(__GNUC__)
+#if defined(__GNUC__) && !defined(__APPLE__)
#define SLJIT_CALL __attribute__ ((fastcall))
#define SLJIT_X86_32_FASTCALL 1
@@ -420,6 +430,7 @@ typedef double sljit_d;
#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR)
SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size);
SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr);
+SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void);
#define SLJIT_MALLOC_EXEC(size) sljit_malloc_exec(size)
#define SLJIT_FREE_EXEC(ptr) sljit_free_exec(ptr)
#endif
diff --git a/src/3rdparty/pcre/sljit/sljitExecAllocator.c b/src/3rdparty/pcre/sljit/sljitExecAllocator.c
index 75a38991d5..f24ed33797 100644
--- a/src/3rdparty/pcre/sljit/sljitExecAllocator.c
+++ b/src/3rdparty/pcre/sljit/sljitExecAllocator.c
@@ -287,3 +287,26 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr)
allocator_release_lock();
}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void)
+{
+ struct free_block* free_block;
+ struct free_block* next_free_block;
+
+ allocator_grab_lock();
+
+ free_block = free_blocks;
+ while (free_block) {
+ next_free_block = free_block->next;
+ if (!free_block->header.prev_size &&
+ AS_BLOCK_HEADER(free_block, free_block->size)->size == 1) {
+ total_size -= free_block->size;
+ sljit_remove_free_block(free_block);
+ free_chunk(free_block, free_block->size + sizeof(struct block_header));
+ }
+ free_block = next_free_block;
+ }
+
+ SLJIT_ASSERT((total_size && free_blocks) || (!total_size && !free_blocks));
+ allocator_release_lock();
+}
diff --git a/src/3rdparty/pcre/sljit/sljitLir.c b/src/3rdparty/pcre/sljit/sljitLir.c
index 6979841070..53d208a69d 100644
--- a/src/3rdparty/pcre/sljit/sljitLir.c
+++ b/src/3rdparty/pcre/sljit/sljitLir.c
@@ -170,6 +170,14 @@
# define FCSR_FCC 33
#endif
+#if (defined SLJIT_CONFIG_TILEGX && SLJIT_CONFIG_TILEGX)
+# define IS_JAL 0x04
+# define IS_COND 0x08
+
+# define PATCH_B 0x10
+# define PATCH_J 0x20
+#endif
+
#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
# define IS_MOVABLE 0x04
# define IS_COND 0x08
@@ -652,14 +660,14 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_compiler_verbose(struct sljit_compiler *comp
}
static char* reg_names[] = {
- (char*)"<noreg>", (char*)"t1", (char*)"t2", (char*)"t3",
- (char*)"te1", (char*)"te2", (char*)"s1", (char*)"s2",
- (char*)"s3", (char*)"se1", (char*)"se2", (char*)"lcr"
+ (char*)"unused", (char*)"s1", (char*)"s2", (char*)"s3",
+ (char*)"se1", (char*)"se2", (char*)"p1", (char*)"p2",
+ (char*)"p3", (char*)"pe1", (char*)"pe2", (char*)"lc"
};
static char* freg_names[] = {
- (char*)"<noreg>", (char*)"float_r1", (char*)"float_r2", (char*)"float_r3",
- (char*)"float_r4", (char*)"float_r5", (char*)"float_r6"
+ (char*)"unused", (char*)"f1", (char*)"f2", (char*)"f3",
+ (char*)"f4", (char*)"f5", (char*)"f6"
};
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
@@ -736,17 +744,17 @@ static SLJIT_CONST char* op_names[] = {
};
static char* jump_names[] = {
- (char*)"c_equal", (char*)"c_not_equal",
- (char*)"c_less", (char*)"c_greater_equal",
- (char*)"c_greater", (char*)"c_less_equal",
- (char*)"c_sig_less", (char*)"c_sig_greater_equal",
- (char*)"c_sig_greater", (char*)"c_sig_less_equal",
- (char*)"c_overflow", (char*)"c_not_overflow",
- (char*)"c_mul_overflow", (char*)"c_mul_not_overflow",
- (char*)"c_float_equal", (char*)"c_float_not_equal",
- (char*)"c_float_less", (char*)"c_float_greater_equal",
- (char*)"c_float_greater", (char*)"c_float_less_equal",
- (char*)"c_float_unordered", (char*)"c_float_ordered",
+ (char*)"equal", (char*)"not_equal",
+ (char*)"less", (char*)"greater_equal",
+ (char*)"greater", (char*)"less_equal",
+ (char*)"sig_less", (char*)"sig_greater_equal",
+ (char*)"sig_greater", (char*)"sig_less_equal",
+ (char*)"overflow", (char*)"not_overflow",
+ (char*)"mul_overflow", (char*)"mul_not_overflow",
+ (char*)"float_equal", (char*)"float_not_equal",
+ (char*)"float_less", (char*)"float_greater_equal",
+ (char*)"float_greater", (char*)"float_less_equal",
+ (char*)"float_unordered", (char*)"float_ordered",
(char*)"jump", (char*)"fast_call",
(char*)"call0", (char*)"call1", (char*)"call2", (char*)"call3"
};
@@ -993,6 +1001,12 @@ static SLJIT_INLINE void check_sljit_get_register_index(sljit_si reg)
SLJIT_ASSERT(reg > 0 && reg <= SLJIT_NO_REGISTERS);
}
+static SLJIT_INLINE void check_sljit_get_float_register_index(sljit_si reg)
+{
+ SLJIT_UNUSED_ARG(reg);
+ SLJIT_ASSERT(reg > 0 && reg <= SLJIT_NO_FLOAT_REGISTERS);
+}
+
static SLJIT_INLINE void check_sljit_emit_op_custom(struct sljit_compiler *compiler,
void *instruction, sljit_si size)
{
@@ -1104,7 +1118,7 @@ static SLJIT_INLINE void check_sljit_emit_jump(struct sljit_compiler *compiler,
SLJIT_ASSERT((type & 0xff) >= SLJIT_C_EQUAL && (type & 0xff) <= SLJIT_CALL3);
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose))
- fprintf(compiler->verbose, " jump%s<%s>\n", !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", jump_names[type & 0xff]);
+ fprintf(compiler->verbose, " jump%s.%s\n", !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", jump_names[type & 0xff]);
#endif
}
@@ -1127,7 +1141,7 @@ static SLJIT_INLINE void check_sljit_emit_cmp(struct sljit_compiler *compiler, s
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose)) {
- fprintf(compiler->verbose, " %scmp%s<%s> ", !(type & SLJIT_INT_OP) ? "" : "i", !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", jump_names[type & 0xff]);
+ fprintf(compiler->verbose, " %scmp%s.%s ", !(type & SLJIT_INT_OP) ? "" : "i", !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", jump_names[type & 0xff]);
sljit_verbose_param(src1, src1w);
fprintf(compiler->verbose, ", ");
sljit_verbose_param(src2, src2w);
@@ -1156,7 +1170,7 @@ static SLJIT_INLINE void check_sljit_emit_fcmp(struct sljit_compiler *compiler,
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose)) {
- fprintf(compiler->verbose, " %scmp%s<%s> ", (type & SLJIT_SINGLE_OP) ? "s" : "d",
+ fprintf(compiler->verbose, " %scmp%s.%s ", (type & SLJIT_SINGLE_OP) ? "s" : "d",
!(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", jump_names[type & 0xff]);
sljit_verbose_fparam(src1, src1w);
fprintf(compiler->verbose, ", ");
@@ -1187,7 +1201,7 @@ static SLJIT_INLINE void check_sljit_emit_ijump(struct sljit_compiler *compiler,
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose)) {
- fprintf(compiler->verbose, " ijump<%s> ", jump_names[type]);
+ fprintf(compiler->verbose, " ijump.%s ", jump_names[type]);
sljit_verbose_param(src, srcw);
fprintf(compiler->verbose, "\n");
}
@@ -1223,14 +1237,14 @@ static SLJIT_INLINE void check_sljit_emit_op_flags(struct sljit_compiler *compil
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose)) {
- fprintf(compiler->verbose, " op_flags<%s%s%s%s> ", !(op & SLJIT_INT_OP) ? "" : "i",
+ fprintf(compiler->verbose, " %sflags.%s%s%s ", !(op & SLJIT_INT_OP) ? "" : "i",
op_names[GET_OPCODE(op)], !(op & SLJIT_SET_E) ? "" : ".e", !(op & SLJIT_KEEP_FLAGS) ? "" : ".k");
sljit_verbose_param(dst, dstw);
if (src != SLJIT_UNUSED) {
fprintf(compiler->verbose, ", ");
sljit_verbose_param(src, srcw);
}
- fprintf(compiler->verbose, ", <%s>\n", jump_names[type]);
+ fprintf(compiler->verbose, ", %s\n", jump_names[type]);
}
#endif
}
@@ -1339,6 +1353,8 @@ static SLJIT_INLINE sljit_si emit_mov_before_return(struct sljit_compiler *compi
# include "sljitNativeMIPS_common.c"
#elif (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
# include "sljitNativeSPARC_common.c"
+#elif (defined SLJIT_CONFIG_TILEGX && SLJIT_CONFIG_TILEGX)
+# include "sljitNativeTILEGX.c"
#endif
#if !(defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
diff --git a/src/3rdparty/pcre/sljit/sljitLir.h b/src/3rdparty/pcre/sljit/sljitLir.h
index 3171d1557c..920689d2ec 100644
--- a/src/3rdparty/pcre/sljit/sljitLir.h
+++ b/src/3rdparty/pcre/sljit/sljitLir.h
@@ -77,7 +77,7 @@
#endif
/* The following header file defines useful macros for fine tuning
-sljit based code generators. They are listed in the begining
+sljit based code generators. They are listed in the beginning
of sljitConfigInternal.h */
#include "sljitConfigInternal.h"
@@ -161,12 +161,14 @@ of sljitConfigInternal.h */
/* Floating point operations are performed on double or
single precision values. */
-#define SLJIT_FLOAT_REG1 1
-#define SLJIT_FLOAT_REG2 2
-#define SLJIT_FLOAT_REG3 3
-#define SLJIT_FLOAT_REG4 4
-#define SLJIT_FLOAT_REG5 5
-#define SLJIT_FLOAT_REG6 6
+#define SLJIT_FLOAT_REG1 1
+#define SLJIT_FLOAT_REG2 2
+#define SLJIT_FLOAT_REG3 3
+#define SLJIT_FLOAT_REG4 4
+#define SLJIT_FLOAT_REG5 5
+#define SLJIT_FLOAT_REG6 6
+
+#define SLJIT_NO_FLOAT_REGISTERS 6
/* --------------------------------------------------------------------- */
/* Main structures and functions */
@@ -281,6 +283,11 @@ struct sljit_compiler {
sljit_sw cache_argw;
#endif
+#if (defined SLJIT_CONFIG_TILEGX && SLJIT_CONFIG_TILEGX)
+ sljit_si cache_arg;
+ sljit_sw cache_argw;
+#endif
+
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
FILE* verbose;
#endif
@@ -306,7 +313,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void);
/* Free everything except the compiled machine code. */
SLJIT_API_FUNC_ATTRIBUTE void sljit_free_compiler(struct sljit_compiler *compiler);
-/* Returns the current error code. If an error is occured, future sljit
+/* Returns the current error code. If an error is occurred, future sljit
calls which uses the same compiler argument returns early with the same
error code. Thus there is no need for checking the error after every
call, it is enough to do it before the code is compiled. Removing
@@ -447,7 +454,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *
sequences. This information could help to improve those code
generators which focuses only a few architectures.
- x86: [reg+imm], -2^32+1 <= imm <= 2^32-1 (full adress space on x86-32)
+ x86: [reg+imm], -2^32+1 <= imm <= 2^32-1 (full address space on x86-32)
[reg+(reg<<imm)] is supported
[imm], -2^32+1 <= imm <= 2^32-1 is supported
Write-back is not supported
@@ -698,12 +705,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler
/* The following function is a helper function for sljit_emit_op_custom.
It returns with the real machine register index of any SLJIT_SCRATCH
SLJIT_SAVED or SLJIT_LOCALS register.
- Note: it returns with -1 for virtual registers (all EREGs on x86-32).
- Note: register returned by SLJIT_LOCALS_REG is not necessary the real
- stack pointer register of the target architecture. */
+ Note: it returns with -1 for virtual registers (all EREGs on x86-32). */
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg);
+/* The following function is a helper function for sljit_emit_op_custom.
+ It returns with the real machine register index of any SLJIT_FLOAT register.
+ Note: the index is divided by 2 on ARM 32 bit architectures. */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg);
+
/* Any instruction can be inserted into the instruction stream by
sljit_emit_op_custom. It has a similar purpose as inline assembly.
The size parameter must match to the instruction size of the target
@@ -896,7 +907,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_consta
/* --------------------------------------------------------------------- */
#define SLJIT_MAJOR_VERSION 0
-#define SLJIT_MINOR_VERSION 90
+#define SLJIT_MINOR_VERSION 91
/* Get the human readable name of the platform. Can be useful on platforms
like ARM, where ARM and Thumb2 functions can be mixed, and
diff --git a/src/3rdparty/pcre/sljit/sljitNativeARM_Thumb2.c b/src/3rdparty/pcre/sljit/sljitNativeARM_Thumb2.c
index 0a60dc2a67..74ec83177d 100644
--- a/src/3rdparty/pcre/sljit/sljitNativeARM_Thumb2.c
+++ b/src/3rdparty/pcre/sljit/sljitNativeARM_Thumb2.c
@@ -418,9 +418,9 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
jump = jump->next;
}
- SLJIT_CACHE_FLUSH(code, code_ptr);
compiler->error = SLJIT_ERR_COMPILED;
- compiler->executable_size = compiler->size * sizeof(sljit_uh);
+ compiler->executable_size = (code_ptr - code) * sizeof(sljit_uh);
+ SLJIT_CACHE_FLUSH(code, code_ptr);
/* Set thumb mode flag. */
return (void*)((sljit_uw)code | 0x1);
}
@@ -1526,6 +1526,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg)
return reg_map[reg];
}
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg)
+{
+ check_sljit_get_float_register_index(reg);
+ return reg;
+}
+
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
void *instruction, sljit_si size)
{
diff --git a/src/3rdparty/pcre/sljit/sljitNativeARM_v5.c b/src/3rdparty/pcre/sljit/sljitNativeARM_v5.c
index 23a45a4c6a..e3ca3d9bb1 100644
--- a/src/3rdparty/pcre/sljit/sljitNativeARM_v5.c
+++ b/src/3rdparty/pcre/sljit/sljitNativeARM_v5.c
@@ -405,7 +405,6 @@ static SLJIT_INLINE sljit_si detect_jump_type(struct sljit_jump *jump, sljit_uw
if (diff & 0x3)
return 0;
- diff >>= 2;
if (jump->flags & IS_BL) {
if (diff <= 0x01ffffff && diff >= -0x02000000) {
*code_ptr = (BL - CONDITIONAL) | (*(code_ptr + 1) & COND_MASK);
@@ -431,7 +430,6 @@ static SLJIT_INLINE sljit_si detect_jump_type(struct sljit_jump *jump, sljit_uw
if (diff & 0x3)
return 0;
- diff >>= 2;
if (diff <= 0x01ffffff && diff >= -0x02000000) {
code_ptr -= 2;
*code_ptr = ((jump->flags & IS_BL) ? (BL - CONDITIONAL) : (B - CONDITIONAL)) | (code_ptr[2] & COND_MASK);
@@ -787,9 +785,9 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
SLJIT_ASSERT(code_ptr - code <= (sljit_si)size);
- SLJIT_CACHE_FLUSH(code, code_ptr);
compiler->error = SLJIT_ERR_COMPILED;
- compiler->executable_size = size * sizeof(sljit_uw);
+ compiler->executable_size = (code_ptr - code) * sizeof(sljit_uw);
+ SLJIT_CACHE_FLUSH(code, code_ptr);
return code;
}
@@ -1991,6 +1989,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg)
return reg_map[reg];
}
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg)
+{
+ check_sljit_get_float_register_index(reg);
+ return reg;
+}
+
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
void *instruction, sljit_si size)
{
diff --git a/src/3rdparty/pcre/sljit/sljitNativeMIPS_common.c b/src/3rdparty/pcre/sljit/sljitNativeMIPS_common.c
index 9559ec32de..ede1c0bafe 100644
--- a/src/3rdparty/pcre/sljit/sljitNativeMIPS_common.c
+++ b/src/3rdparty/pcre/sljit/sljitNativeMIPS_common.c
@@ -30,7 +30,7 @@
SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void)
{
#if (defined SLJIT_MIPS_32_64 && SLJIT_MIPS_32_64)
- return "MIPS" SLJIT_CPUINFO;
+ return "MIPS(32)" SLJIT_CPUINFO;
#else
return "MIPS III" SLJIT_CPUINFO;
#endif
@@ -398,7 +398,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
}
compiler->error = SLJIT_ERR_COMPILED;
- compiler->executable_size = compiler->size * sizeof(sljit_ins);
+ compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
#ifndef __GNUC__
SLJIT_CACHE_FLUSH(code, code_ptr);
#else
@@ -1099,6 +1099,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg)
return reg_map[reg];
}
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg)
+{
+ check_sljit_get_float_register_index(reg);
+ return reg << 1;
+}
+
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
void *instruction, sljit_si size)
{
diff --git a/src/3rdparty/pcre/sljit/sljitNativePPC_common.c b/src/3rdparty/pcre/sljit/sljitNativePPC_common.c
index f7c75a7906..67e6898a17 100644
--- a/src/3rdparty/pcre/sljit/sljitNativePPC_common.c
+++ b/src/3rdparty/pcre/sljit/sljitNativePPC_common.c
@@ -402,9 +402,9 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
jump = jump->next;
}
- SLJIT_CACHE_FLUSH(code, code_ptr);
compiler->error = SLJIT_ERR_COMPILED;
- compiler->executable_size = compiler->size * sizeof(sljit_ins);
+ compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
+ SLJIT_CACHE_FLUSH(code, code_ptr);
#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
@@ -1507,6 +1507,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg)
return reg_map[reg];
}
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg)
+{
+ check_sljit_get_float_register_index(reg);
+ return reg;
+}
+
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
void *instruction, sljit_si size)
{
diff --git a/src/3rdparty/pcre/sljit/sljitNativeSPARC_common.c b/src/3rdparty/pcre/sljit/sljitNativeSPARC_common.c
index c6522be2a7..e5571ee6dd 100644
--- a/src/3rdparty/pcre/sljit/sljitNativeSPARC_common.c
+++ b/src/3rdparty/pcre/sljit/sljitNativeSPARC_common.c
@@ -35,6 +35,30 @@ typedef sljit_ui sljit_ins;
static void sparc_cache_flush(sljit_ins *from, sljit_ins *to)
{
+#if defined(__SUNPRO_C) && __SUNPRO_C < 0x590
+ __asm (
+ /* if (from == to) return */
+ "cmp %i0, %i1\n"
+ "be .leave\n"
+ "nop\n"
+
+ /* loop until from >= to */
+ ".mainloop:\n"
+ "flush %i0\n"
+ "add %i0, 8, %i0\n"
+ "cmp %i0, %i1\n"
+ "bcs .mainloop\n"
+ "nop\n"
+
+ /* The comparison was done above. */
+ "bne .leave\n"
+ /* nop is not necessary here, since the
+ sub operation has no side effect. */
+ "sub %i0, 4, %i0\n"
+ "flush %i0\n"
+ ".leave:"
+ );
+#else
if (SLJIT_UNLIKELY(from == to))
return;
@@ -49,12 +73,13 @@ static void sparc_cache_flush(sljit_ins *from, sljit_ins *to)
if (from == to) {
/* Flush the last word. */
- to --;
+ from --;
__asm__ volatile (
"flush %0\n"
- : : "r"(to)
+ : : "r"(from)
);
}
+#endif
}
/* TMP_REG2 is not used by getput_arg */
@@ -344,7 +369,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
compiler->error = SLJIT_ERR_COMPILED;
- compiler->executable_size = compiler->size * sizeof(sljit_ins);
+ compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
SLJIT_CACHE_FLUSH(code, code_ptr);
return code;
}
@@ -896,6 +921,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg)
return reg_map[reg];
}
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg)
+{
+ check_sljit_get_float_register_index(reg);
+ return reg << 1;
+}
+
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
void *instruction, sljit_si size)
{
diff --git a/src/3rdparty/pcre/sljit/sljitNativeX86_32.c b/src/3rdparty/pcre/sljit/sljitNativeX86_32.c
index 03a595bd85..2866e8f2a1 100644
--- a/src/3rdparty/pcre/sljit/sljitNativeX86_32.c
+++ b/src/3rdparty/pcre/sljit/sljitNativeX86_32.c
@@ -149,7 +149,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil
if (saveds > 3)
locals_offset += (saveds - 3) * sizeof(sljit_uw);
compiler->locals_offset = locals_offset;
+#if defined(__APPLE__)
+ saveds = (2 + (saveds <= 3 ? saveds : 3)) * sizeof(sljit_uw);
+ local_size = ((locals_offset + saveds + local_size + 15) & ~15) - saveds;
+#else
local_size = locals_offset + ((local_size + sizeof(sljit_uw) - 1) & ~(sizeof(sljit_uw) - 1));
+#endif
compiler->local_size = local_size;
#ifdef _WIN32
@@ -197,7 +202,12 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler,
if (saveds > 3)
locals_offset += (saveds - 3) * sizeof(sljit_uw);
compiler->locals_offset = locals_offset;
+#if defined(__APPLE__)
+ saveds = (2 + (saveds <= 3 ? saveds : 3)) * sizeof(sljit_uw);
+ compiler->local_size = ((locals_offset + saveds + local_size + 15) & ~15) - saveds;
+#else
compiler->local_size = locals_offset + ((local_size + sizeof(sljit_uw) - 1) & ~(sizeof(sljit_uw) - 1));
+#endif
}
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw)
diff --git a/src/3rdparty/pcre/sljit/sljitNativeX86_common.c b/src/3rdparty/pcre/sljit/sljitNativeX86_common.c
index ab98a03d2c..ceb3d675b7 100644
--- a/src/3rdparty/pcre/sljit/sljitNativeX86_common.c
+++ b/src/3rdparty/pcre/sljit/sljitNativeX86_common.c
@@ -206,6 +206,7 @@ static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = {
#define OR_r_rm 0x0b
#define OR_EAX_i32 0x0d
#define OR_rm_r 0x09
+#define OR_rm8_r8 0x08
#define POP_r 0x58
#define POP_rm 0x8f
#define POPF 0x9d
@@ -267,75 +268,54 @@ static sljit_si cpu_has_sse2 = -1;
#endif
static sljit_si cpu_has_cmov = -1;
-#if defined(_MSC_VER) && (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
-#if _MSC_VER >= 1400
+#if defined(_MSC_VER) && _MSC_VER >= 1400
#include <intrin.h>
-#else
-#error "MSVC does not support inline assembly in 64 bit mode"
#endif
-#endif /* _MSC_VER && SLJIT_CONFIG_X86_64 */
static void get_cpu_features(void)
{
sljit_ui features;
-#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+#if defined(_MSC_VER) && _MSC_VER >= 1400
+
+ int CPUInfo[4];
+ __cpuid(CPUInfo, 1);
+ features = (sljit_ui)CPUInfo[3];
+
+#elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C)
-#if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C)
/* AT&T syntax. */
__asm__ (
- "pushl %%ebx\n"
"movl $0x1, %%eax\n"
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+ /* On x86-32, there is no red zone, so this
+ should work (no need for a local variable). */
+ "push %%ebx\n"
+#endif
"cpuid\n"
- "popl %%ebx\n"
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+ "pop %%ebx\n"
+#endif
"movl %%edx, %0\n"
: "=g" (features)
:
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
: "%eax", "%ecx", "%edx"
- );
-#elif defined(_MSC_VER) || defined(__BORLANDC__)
- /* Intel syntax. */
- __asm {
- mov eax, 1
- push ebx
- cpuid
- pop ebx
- mov features, edx
- }
#else
-# error "SLJIT_DETECT_SSE2 is not implemented for this C compiler"
+ : "%rax", "%rbx", "%rcx", "%rdx"
#endif
-
-#else /* SLJIT_CONFIG_X86_32 */
-
-#if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C)
- /* AT&T syntax. */
- __asm__ (
- "pushq %%rbx\n"
- "movl $0x1, %%eax\n"
- "cpuid\n"
- "popq %%rbx\n"
- "movl %%edx, %0\n"
- : "=g" (features)
- :
- : "%rax", "%rcx", "%rdx"
);
-#elif defined(_MSC_VER) && _MSC_VER >= 1400
- int CPUInfo[4];
- __cpuid(CPUInfo, 1);
- features = (sljit_ui)CPUInfo[3];
-#else
+#else /* _MSC_VER && _MSC_VER >= 1400 */
+
+ /* Intel syntax. */
__asm {
mov eax, 1
- push rbx
cpuid
- pop rbx
mov features, edx
}
-#endif
-#endif /* SLJIT_CONFIG_X86_32 */
+#endif /* _MSC_VER && _MSC_VER >= 1400 */
#if (defined SLJIT_SSE2 && SLJIT_SSE2) && (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
cpu_has_sse2 = (features >> 26) & 0x1;
@@ -570,7 +550,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
/* Maybe we waste some space because of short jumps. */
SLJIT_ASSERT(code_ptr <= code + compiler->size);
compiler->error = SLJIT_ERR_COMPILED;
- compiler->executable_size = compiler->size;
+ compiler->executable_size = code_ptr - code;
return (void*)code;
}
@@ -650,9 +630,10 @@ static void SLJIT_CALL sljit_grow_stack(sljit_sw local_size)
This function touches all 4k pages belongs to the requested stack space,
which size is passed in local_size. This is necessary on Windows where
the stack can only grow in 4k steps. However, this function just burn
- CPU cycles if the stack is large enough, but you don't know it in advance.
- I think this is a bad design even if it has some reasons. */
- alloca(local_size);
+ CPU cycles if the stack is large enough. However, you don't know it in
+ advance, so it must always be called. I think this is a bad design in
+ general even if it has some reasons. */
+ *(sljit_si*)alloca(local_size) = 0;
}
#endif
@@ -1785,7 +1766,7 @@ static sljit_si emit_mul(struct sljit_compiler *compiler,
return SLJIT_SUCCESS;
}
-static sljit_si emit_lea_binary(struct sljit_compiler *compiler,
+static sljit_si emit_lea_binary(struct sljit_compiler *compiler, sljit_si keep_flags,
sljit_si dst, sljit_sw dstw,
sljit_si src1, sljit_sw src1w,
sljit_si src2, sljit_sw src2w)
@@ -1794,10 +1775,12 @@ static sljit_si emit_lea_binary(struct sljit_compiler *compiler,
sljit_si dst_r, done = 0;
/* These cases better be left to handled by normal way. */
- if (dst == src1 && dstw == src1w)
- return SLJIT_ERR_UNSUPPORTED;
- if (dst == src2 && dstw == src2w)
- return SLJIT_ERR_UNSUPPORTED;
+ if (!keep_flags) {
+ if (dst == src1 && dstw == src1w)
+ return SLJIT_ERR_UNSUPPORTED;
+ if (dst == src2 && dstw == src2w)
+ return SLJIT_ERR_UNSUPPORTED;
+ }
dst_r = (dst <= TMP_REGISTER) ? dst : TMP_REGISTER;
@@ -2153,7 +2136,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler
switch (GET_OPCODE(op)) {
case SLJIT_ADD:
if (!GET_FLAGS(op)) {
- if (emit_lea_binary(compiler, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
+ if (emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
return compiler->error;
}
else
@@ -2173,7 +2156,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler
dst, dstw, src1, src1w, src2, src2w);
case SLJIT_SUB:
if (!GET_FLAGS(op)) {
- if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
+ if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
return compiler->error;
}
else
@@ -2231,6 +2214,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg)
return reg_map[reg];
}
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg)
+{
+ check_sljit_get_float_register_index(reg);
+ return reg;
+}
+
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
void *instruction, sljit_si size)
{
@@ -2637,6 +2626,21 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com
cond_set = get_jump_code(type) + 0x10;
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && dst <= TMP_REGISTER && dst == src) {
+ inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 3);
+ FAIL_IF(!inst);
+ INC_SIZE(4 + 3);
+ /* Set low register to conditional flag. */
+ *inst++ = (reg_map[TMP_REGISTER] <= 7) ? REX : REX_B;
+ *inst++ = GROUP_0F;
+ *inst++ = cond_set;
+ *inst++ = MOD_REG | reg_lmap[TMP_REGISTER];
+ *inst++ = REX | (reg_map[TMP_REGISTER] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B);
+ *inst++ = OR_rm8_r8;
+ *inst++ = MOD_REG | (reg_lmap[TMP_REGISTER] << 3) | reg_lmap[dst];
+ return SLJIT_SUCCESS;
+ }
+
reg = (op == SLJIT_MOV && dst <= TMP_REGISTER) ? dst : TMP_REGISTER;
inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 4);
@@ -2717,6 +2721,39 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com
return SLJIT_SUCCESS;
}
+ if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && dst <= TMP_REGISTER && dst == src && reg_map[dst] <= 4) {
+ SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SCRATCH_REG1] == 0, scratch_reg1_must_be_eax);
+ if (dst != SLJIT_SCRATCH_REG1) {
+ inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 1);
+ FAIL_IF(!inst);
+ INC_SIZE(1 + 3 + 2 + 1);
+ /* Set low register to conditional flag. */
+ *inst++ = XCHG_EAX_r + reg_map[TMP_REGISTER];
+ *inst++ = GROUP_0F;
+ *inst++ = cond_set;
+ *inst++ = MOD_REG | 0 /* eax */;
+ *inst++ = OR_rm8_r8;
+ *inst++ = MOD_REG | (0 /* eax */ << 3) | reg_map[dst];
+ *inst++ = XCHG_EAX_r + reg_map[TMP_REGISTER];
+ }
+ else {
+ inst = (sljit_ub*)ensure_buf(compiler, 1 + 2 + 3 + 2 + 2);
+ FAIL_IF(!inst);
+ INC_SIZE(2 + 3 + 2 + 2);
+ /* Set low register to conditional flag. */
+ *inst++ = XCHG_r_rm;
+ *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REGISTER];
+ *inst++ = GROUP_0F;
+ *inst++ = cond_set;
+ *inst++ = MOD_REG | 1 /* ecx */;
+ *inst++ = OR_rm8_r8;
+ *inst++ = MOD_REG | (1 /* ecx */ << 3) | 0 /* eax */;
+ *inst++ = XCHG_r_rm;
+ *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REGISTER];
+ }
+ return SLJIT_SUCCESS;
+ }
+
/* Set TMP_REGISTER to the bit. */
inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
FAIL_IF(!inst);
@@ -2761,16 +2798,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_local_base(struct sljit_compiler *co
if (NOT_HALFWORD(offset)) {
FAIL_IF(emit_load_imm64(compiler, TMP_REGISTER, offset));
#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
- SLJIT_ASSERT(emit_lea_binary(compiler, dst, dstw, SLJIT_LOCALS_REG, 0, TMP_REGISTER, 0) != SLJIT_ERR_UNSUPPORTED);
+ SLJIT_ASSERT(emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_LOCALS_REG, 0, TMP_REGISTER, 0) != SLJIT_ERR_UNSUPPORTED);
return compiler->error;
#else
- return emit_lea_binary(compiler, dst, dstw, SLJIT_LOCALS_REG, 0, TMP_REGISTER, 0);
+ return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_LOCALS_REG, 0, TMP_REGISTER, 0);
#endif
}
#endif
if (offset != 0)
- return emit_lea_binary(compiler, dst, dstw, SLJIT_LOCALS_REG, 0, SLJIT_IMM, offset);
+ return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_LOCALS_REG, 0, SLJIT_IMM, offset);
return emit_mov(compiler, dst, dstw, SLJIT_LOCALS_REG, 0);
}
diff --git a/src/3rdparty/pcre/ucp.h b/src/3rdparty/pcre/ucp.h
index 21039106e5..d8b34bfcc5 100644
--- a/src/3rdparty/pcre/ucp.h
+++ b/src/3rdparty/pcre/ucp.h
@@ -11,7 +11,10 @@ should always be at the end of each enum, for backwards compatibility.
IMPORTANT: Note also that the specific numeric values of the enums have to be
the same as the values that are generated by the maint/MultiStage2.py script,
-where the equivalent property descriptive names are listed in vectors. */
+where the equivalent property descriptive names are listed in vectors.
+
+ALSO: The specific values of the first two enums are assumed for the table
+called catposstab in pcre_compile.c. */
/* These are the general character categories. */